1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=ALL,AVX512F 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefixes=ALL,AVX512BW 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq | FileCheck %s --check-prefixes=ALL,AVX512DQ 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vbmi | FileCheck %s --check-prefixes=ALL,AVX512VBMI 6 7define <64 x i8> @shuffle_v64i8_02_03_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u(<64 x i8> %a) { 8; ALL-LABEL: shuffle_v64i8_02_03_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u: 9; ALL: # %bb.0: 10; ALL-NEXT: vpsrld $16, %xmm0, %xmm0 11; ALL-NEXT: retq 12 %b = shufflevector <64 x i8> %a, <64 x i8> poison, <64 x i32> <i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 13 ret <64 x i8> %b 14} 15 16define <64 x i8> @shuffle_v64i8_zz_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_zz_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30_zz_32_33_34_35_36_37_38_39_40_41_42_43_44_45_46_zz_48_49_50_51_52_53_54_55_56_57_58_59_60_61_62(<64 x i8> %a, <64 x i8> %b) { 17; AVX512F-LABEL: shuffle_v64i8_zz_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_zz_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30_zz_32_33_34_35_36_37_38_39_40_41_42_43_44_45_46_zz_48_49_50_51_52_53_54_55_56_57_58_59_60_61_62: 18; AVX512F: # %bb.0: 19; AVX512F-NEXT: vpslldq {{.*#+}} ymm1 = zero,ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],zero,ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30] 20; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm0 21; AVX512F-NEXT: vpslldq {{.*#+}} ymm0 = zero,ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],zero,ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30] 22; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 23; AVX512F-NEXT: retq 24; 25; AVX512BW-LABEL: shuffle_v64i8_zz_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_zz_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30_zz_32_33_34_35_36_37_38_39_40_41_42_43_44_45_46_zz_48_49_50_51_52_53_54_55_56_57_58_59_60_61_62: 26; AVX512BW: # %bb.0: 27; AVX512BW-NEXT: vpslldq {{.*#+}} zmm0 = zero,zmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],zero,zmm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30],zero,zmm0[32,33,34,35,36,37,38,39,40,41,42,43,44,45,46],zero,zmm0[48,49,50,51,52,53,54,55,56,57,58,59,60,61,62] 28; AVX512BW-NEXT: retq 29; 30; AVX512DQ-LABEL: shuffle_v64i8_zz_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_zz_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30_zz_32_33_34_35_36_37_38_39_40_41_42_43_44_45_46_zz_48_49_50_51_52_53_54_55_56_57_58_59_60_61_62: 31; AVX512DQ: # %bb.0: 32; AVX512DQ-NEXT: vpslldq {{.*#+}} ymm1 = zero,ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],zero,ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30] 33; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm0 34; AVX512DQ-NEXT: vpslldq {{.*#+}} ymm0 = zero,ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],zero,ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30] 35; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 36; AVX512DQ-NEXT: retq 37; 38; AVX512VBMI-LABEL: shuffle_v64i8_zz_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_zz_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30_zz_32_33_34_35_36_37_38_39_40_41_42_43_44_45_46_zz_48_49_50_51_52_53_54_55_56_57_58_59_60_61_62: 39; AVX512VBMI: # %bb.0: 40; AVX512VBMI-NEXT: vpslldq {{.*#+}} zmm0 = zero,zmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],zero,zmm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30],zero,zmm0[32,33,34,35,36,37,38,39,40,41,42,43,44,45,46],zero,zmm0[48,49,50,51,52,53,54,55,56,57,58,59,60,61,62] 41; AVX512VBMI-NEXT: retq 42 %shuffle = shufflevector <64 x i8> %a, <64 x i8> zeroinitializer, <64 x i32> <i32 79, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 95, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 111, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 127, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62> 43 ret <64 x i8> %shuffle 44} 45 46define <64 x i8> @shuffle_v64i8_02_03_04_05_06_07_08_09_10_11_12_13_14_15_zz_zz_18_19_20_21_22_23_24_25_26_27_28_29_30_31_zz_zz_34_35_36_37_38_39_40_41_42_43_44_45_46_47_zz_zz_50_51_52_53_54_55_56_57_58_59_60_61_62_63_zz_zz(<64 x i8> %a, <64 x i8> %b) { 47; AVX512F-LABEL: shuffle_v64i8_02_03_04_05_06_07_08_09_10_11_12_13_14_15_zz_zz_18_19_20_21_22_23_24_25_26_27_28_29_30_31_zz_zz_34_35_36_37_38_39_40_41_42_43_44_45_46_47_zz_zz_50_51_52_53_54_55_56_57_58_59_60_61_62_63_zz_zz: 48; AVX512F: # %bb.0: 49; AVX512F-NEXT: vpsrldq {{.*#+}} ymm1 = ymm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,ymm0[18,19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero 50; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm0 51; AVX512F-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,ymm0[18,19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero 52; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 53; AVX512F-NEXT: retq 54; 55; AVX512BW-LABEL: shuffle_v64i8_02_03_04_05_06_07_08_09_10_11_12_13_14_15_zz_zz_18_19_20_21_22_23_24_25_26_27_28_29_30_31_zz_zz_34_35_36_37_38_39_40_41_42_43_44_45_46_47_zz_zz_50_51_52_53_54_55_56_57_58_59_60_61_62_63_zz_zz: 56; AVX512BW: # %bb.0: 57; AVX512BW-NEXT: vpsrldq {{.*#+}} zmm0 = zmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zmm0[18,19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zmm0[34,35,36,37,38,39,40,41,42,43,44,45,46,47],zero,zero,zmm0[50,51,52,53,54,55,56,57,58,59,60,61,62,63],zero,zero 58; AVX512BW-NEXT: retq 59; 60; AVX512DQ-LABEL: shuffle_v64i8_02_03_04_05_06_07_08_09_10_11_12_13_14_15_zz_zz_18_19_20_21_22_23_24_25_26_27_28_29_30_31_zz_zz_34_35_36_37_38_39_40_41_42_43_44_45_46_47_zz_zz_50_51_52_53_54_55_56_57_58_59_60_61_62_63_zz_zz: 61; AVX512DQ: # %bb.0: 62; AVX512DQ-NEXT: vpsrldq {{.*#+}} ymm1 = ymm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,ymm0[18,19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero 63; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm0 64; AVX512DQ-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,ymm0[18,19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero 65; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 66; AVX512DQ-NEXT: retq 67; 68; AVX512VBMI-LABEL: shuffle_v64i8_02_03_04_05_06_07_08_09_10_11_12_13_14_15_zz_zz_18_19_20_21_22_23_24_25_26_27_28_29_30_31_zz_zz_34_35_36_37_38_39_40_41_42_43_44_45_46_47_zz_zz_50_51_52_53_54_55_56_57_58_59_60_61_62_63_zz_zz: 69; AVX512VBMI: # %bb.0: 70; AVX512VBMI-NEXT: vpsrldq {{.*#+}} zmm0 = zmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zmm0[18,19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zmm0[34,35,36,37,38,39,40,41,42,43,44,45,46,47],zero,zero,zmm0[50,51,52,53,54,55,56,57,58,59,60,61,62,63],zero,zero 71; AVX512VBMI-NEXT: retq 72 %shuffle = shufflevector <64 x i8> %a, <64 x i8> zeroinitializer, <64 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 64, i32 64, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 64, i32 64, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 64, i32 64, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63, i32 64, i32 64> 73 ret <64 x i8> %shuffle 74} 75 76define <64 x i8> @shuffle_v64i8_79_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_95_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30_111_32_33_34_35_36_37_38_39_40_41_42_43_44_45_46_127_48_49_50_51_52_53_54_55_56_57_58_59_60_61_62(<64 x i8> %a, <64 x i8> %b) { 77; AVX512F-LABEL: shuffle_v64i8_79_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_95_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30_111_32_33_34_35_36_37_38_39_40_41_42_43_44_45_46_127_48_49_50_51_52_53_54_55_56_57_58_59_60_61_62: 78; AVX512F: # %bb.0: 79; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm2 80; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm3 81; AVX512F-NEXT: vpalignr {{.*#+}} ymm2 = ymm2[15],ymm3[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],ymm2[31],ymm3[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30] 82; AVX512F-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[15],ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],ymm1[31],ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30] 83; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 84; AVX512F-NEXT: retq 85; 86; AVX512BW-LABEL: shuffle_v64i8_79_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_95_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30_111_32_33_34_35_36_37_38_39_40_41_42_43_44_45_46_127_48_49_50_51_52_53_54_55_56_57_58_59_60_61_62: 87; AVX512BW: # %bb.0: 88; AVX512BW-NEXT: vpalignr {{.*#+}} zmm0 = zmm1[15],zmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],zmm1[31],zmm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30],zmm1[47],zmm0[32,33,34,35,36,37,38,39,40,41,42,43,44,45,46],zmm1[63],zmm0[48,49,50,51,52,53,54,55,56,57,58,59,60,61,62] 89; AVX512BW-NEXT: retq 90; 91; AVX512DQ-LABEL: shuffle_v64i8_79_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_95_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30_111_32_33_34_35_36_37_38_39_40_41_42_43_44_45_46_127_48_49_50_51_52_53_54_55_56_57_58_59_60_61_62: 92; AVX512DQ: # %bb.0: 93; AVX512DQ-NEXT: vextracti64x4 $1, %zmm1, %ymm2 94; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm3 95; AVX512DQ-NEXT: vpalignr {{.*#+}} ymm2 = ymm2[15],ymm3[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],ymm2[31],ymm3[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30] 96; AVX512DQ-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[15],ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],ymm1[31],ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30] 97; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 98; AVX512DQ-NEXT: retq 99; 100; AVX512VBMI-LABEL: shuffle_v64i8_79_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_95_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30_111_32_33_34_35_36_37_38_39_40_41_42_43_44_45_46_127_48_49_50_51_52_53_54_55_56_57_58_59_60_61_62: 101; AVX512VBMI: # %bb.0: 102; AVX512VBMI-NEXT: vpalignr {{.*#+}} zmm0 = zmm1[15],zmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],zmm1[31],zmm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30],zmm1[47],zmm0[32,33,34,35,36,37,38,39,40,41,42,43,44,45,46],zmm1[63],zmm0[48,49,50,51,52,53,54,55,56,57,58,59,60,61,62] 103; AVX512VBMI-NEXT: retq 104 %shuffle = shufflevector <64 x i8> %a, <64 x i8> %b, <64 x i32> <i32 79, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 95, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 111, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 127, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62> 105 ret <64 x i8> %shuffle 106} 107 108 109define <64 x i8> @shuffle_v64i8_0zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz(<64 x i8> %a) { 110; AVX512F-LABEL: shuffle_v64i8_0zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz: 111; AVX512F: # %bb.0: 112; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm1 = [255,0] 113; AVX512F-NEXT: vpandq %zmm1, %zmm0, %zmm0 114; AVX512F-NEXT: retq 115; 116; AVX512BW-LABEL: shuffle_v64i8_0zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz: 117; AVX512BW: # %bb.0: 118; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = [255,0] 119; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0 120; AVX512BW-NEXT: retq 121; 122; AVX512DQ-LABEL: shuffle_v64i8_0zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz: 123; AVX512DQ: # %bb.0: 124; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm1 = [255,0] 125; AVX512DQ-NEXT: vandps %zmm1, %zmm0, %zmm0 126; AVX512DQ-NEXT: retq 127; 128; AVX512VBMI-LABEL: shuffle_v64i8_0zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz: 129; AVX512VBMI: # %bb.0: 130; AVX512VBMI-NEXT: vpmovzxbq {{.*#+}} xmm1 = [255,0] 131; AVX512VBMI-NEXT: vpandq %zmm1, %zmm0, %zmm0 132; AVX512VBMI-NEXT: retq 133 %shuffle = shufflevector <64 x i8> %a, <64 x i8> zeroinitializer, <64 x i32> <i32 0, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64> 134 ret <64 x i8> %shuffle 135} 136 137define <64 x i8> @shuffle_v64i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<64 x i8> %a, <64 x i8> %b) { 138; AVX512F-LABEL: shuffle_v64i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 139; AVX512F: # %bb.0: 140; AVX512F-NEXT: vpbroadcastb %xmm0, %ymm0 141; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 142; AVX512F-NEXT: retq 143; 144; AVX512BW-LABEL: shuffle_v64i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 145; AVX512BW: # %bb.0: 146; AVX512BW-NEXT: vpbroadcastb %xmm0, %zmm0 147; AVX512BW-NEXT: retq 148; 149; AVX512DQ-LABEL: shuffle_v64i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 150; AVX512DQ: # %bb.0: 151; AVX512DQ-NEXT: vpbroadcastb %xmm0, %ymm0 152; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 153; AVX512DQ-NEXT: retq 154; 155; AVX512VBMI-LABEL: shuffle_v64i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 156; AVX512VBMI: # %bb.0: 157; AVX512VBMI-NEXT: vpbroadcastb %xmm0, %zmm0 158; AVX512VBMI-NEXT: retq 159 %shuffle = shufflevector <64 x i8> %a, <64 x i8> %b, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 160 ret <64 x i8> %shuffle 161} 162 163define <64 x i8> @shuffle_v64i8_63_62_61_60_59_58_57_56_55_54_53_52_51_50_49_48_47_46_45_44_43_42_41_40_39_38_37_36_35_34_33_32_31_30_29_28_27_26_25_24_23_22_21_20_19_18_17_16_15_14_13_12_11_10_09_08_07_06_05_04_03_02_01_00(<64 x i8> %a) { 164; AVX512F-LABEL: shuffle_v64i8_63_62_61_60_59_58_57_56_55_54_53_52_51_50_49_48_47_46_45_44_43_42_41_40_39_38_37_36_35_34_33_32_31_30_29_28_27_26_25_24_23_22_21_20_19_18_17_16_15_14_13_12_11_10_09_08_07_06_05_04_03_02_01_00: 165; AVX512F: # %bb.0: 166; AVX512F-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0] 167; AVX512F-NEXT: # ymm1 = mem[0,1,0,1] 168; AVX512F-NEXT: vpshufb %ymm1, %ymm0, %ymm2 169; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm0 170; AVX512F-NEXT: vpshufb %ymm1, %ymm0, %ymm0 171; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 172; AVX512F-NEXT: vpermq {{.*#+}} zmm0 = zmm0[2,3,0,1,6,7,4,5] 173; AVX512F-NEXT: retq 174; 175; AVX512BW-LABEL: shuffle_v64i8_63_62_61_60_59_58_57_56_55_54_53_52_51_50_49_48_47_46_45_44_43_42_41_40_39_38_37_36_35_34_33_32_31_30_29_28_27_26_25_24_23_22_21_20_19_18_17_16_15_14_13_12_11_10_09_08_07_06_05_04_03_02_01_00: 176; AVX512BW: # %bb.0: 177; AVX512BW-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,47,46,45,44,43,42,41,40,39,38,37,36,35,34,33,32,63,62,61,60,59,58,57,56,55,54,53,52,51,50,49,48] 178; AVX512BW-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[6,7,4,5,2,3,0,1] 179; AVX512BW-NEXT: retq 180; 181; AVX512DQ-LABEL: shuffle_v64i8_63_62_61_60_59_58_57_56_55_54_53_52_51_50_49_48_47_46_45_44_43_42_41_40_39_38_37_36_35_34_33_32_31_30_29_28_27_26_25_24_23_22_21_20_19_18_17_16_15_14_13_12_11_10_09_08_07_06_05_04_03_02_01_00: 182; AVX512DQ: # %bb.0: 183; AVX512DQ-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0] 184; AVX512DQ-NEXT: # ymm1 = mem[0,1,0,1] 185; AVX512DQ-NEXT: vpshufb %ymm1, %ymm0, %ymm2 186; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm0 187; AVX512DQ-NEXT: vpshufb %ymm1, %ymm0, %ymm0 188; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 189; AVX512DQ-NEXT: vpermq {{.*#+}} zmm0 = zmm0[2,3,0,1,6,7,4,5] 190; AVX512DQ-NEXT: retq 191; 192; AVX512VBMI-LABEL: shuffle_v64i8_63_62_61_60_59_58_57_56_55_54_53_52_51_50_49_48_47_46_45_44_43_42_41_40_39_38_37_36_35_34_33_32_31_30_29_28_27_26_25_24_23_22_21_20_19_18_17_16_15_14_13_12_11_10_09_08_07_06_05_04_03_02_01_00: 193; AVX512VBMI: # %bb.0: 194; AVX512VBMI-NEXT: vmovdqa64 {{.*#+}} zmm1 = [63,62,61,60,59,58,57,56,55,54,53,52,51,50,49,48,47,46,45,44,43,42,41,40,39,38,37,36,35,34,33,32,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0] 195; AVX512VBMI-NEXT: vpermb %zmm0, %zmm1, %zmm0 196; AVX512VBMI-NEXT: retq 197 %shuffle = shufflevector <64 x i8> %a, <64 x i8> poison, <64 x i32> <i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 198 ret <64 x i8> %shuffle 199} 200 201; PR44379 202define <64 x i8> @shuffle_v64i8_02_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09_18_19_20_21_22_23_16_17_26_27_28_29_30_31_24_25_34_35_36_37_38_39_32_33_42_43_44_45_46_47_40_41_50_51_52_53_54_55_48_49_58_59_60_61_62_63_56_57(<64 x i8> %a) { 203; ALL-LABEL: shuffle_v64i8_02_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09_18_19_20_21_22_23_16_17_26_27_28_29_30_31_24_25_34_35_36_37_38_39_32_33_42_43_44_45_46_47_40_41_50_51_52_53_54_55_48_49_58_59_60_61_62_63_56_57: 204; ALL: # %bb.0: 205; ALL-NEXT: vprolq $48, %zmm0, %zmm0 206; ALL-NEXT: retq 207 %shuffle = shufflevector <64 x i8> %a, <64 x i8> poison, <64 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 16, i32 17, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 24, i32 25, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 32, i32 33, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 40, i32 41, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 48, i32 49, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63, i32 56, i32 57> 208 ret <64 x i8> %shuffle 209} 210 211; PR54658 212define <64 x i8> @shuffle_v64i8_01_03_02_05_07_06_09_11_10_13_15_14_17_19_18_21_23_22_25_27_26_29_31_30_33_35_34_37_39_38_41_43_42_45_47_46_49_51_50_53_55_54_57_59_58_61_63_62_01_03_02_05_01_03_02_05_01_03_02_05_01_03_02_05(<64 x i8> %a) { 213; AVX512F-LABEL: shuffle_v64i8_01_03_02_05_07_06_09_11_10_13_15_14_17_19_18_21_23_22_25_27_26_29_31_30_33_35_34_37_39_38_41_43_42_45_47_46_49_51_50_53_55_54_57_59_58_61_63_62_01_03_02_05_01_03_02_05_01_03_02_05_01_03_02_05: 214; AVX512F: # %bb.0: 215; AVX512F-NEXT: vpmovsxbd {{.*#+}} ymm1 = [0,1,2,4,5,6,22,23] 216; AVX512F-NEXT: vpshufb {{.*#+}} ymm2 = ymm0[1,3,2,5,7,6,9,11,10,13,15,14,u,u,u,u,17,19,18,21,23,22,25,27,26,29,31,30,u,u,u,u] 217; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm3 218; AVX512F-NEXT: vpshufb {{.*#+}} xmm4 = xmm3[u,u,u,u,u,u,u,u,1,3,2,5,7,6,9,11] 219; AVX512F-NEXT: vinserti128 $1, %xmm4, %ymm0, %ymm4 220; AVX512F-NEXT: vpermt2d %zmm4, %zmm1, %zmm2 221; AVX512F-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm3[2,3,4,5,6,7] 222; AVX512F-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[10,13,15,14,1,3,2,5,u,u,u,u,u,u,u,u,26,29,31,30,17,19,18,21,23,22,25,27,u,u,u,u] 223; AVX512F-NEXT: vpmovsxbd {{.*#+}} ymm1 = [0,5,6,4,1,1,1,1] 224; AVX512F-NEXT: vpermd %ymm0, %ymm1, %ymm0 225; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm2, %zmm0 226; AVX512F-NEXT: retq 227; 228; AVX512BW-LABEL: shuffle_v64i8_01_03_02_05_07_06_09_11_10_13_15_14_17_19_18_21_23_22_25_27_26_29_31_30_33_35_34_37_39_38_41_43_42_45_47_46_49_51_50_53_55_54_57_59_58_61_63_62_01_03_02_05_01_03_02_05_01_03_02_05_01_03_02_05: 229; AVX512BW: # %bb.0: 230; AVX512BW-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[1,3,2,5,7,6,9,11,10,13,15,14,u,u,u,u,17,19,18,21,23,22,25,27,26,29,31,30,u,u,u,u,33,35,34,37,39,38,41,43,42,45,47,46,u,u,u,u,49,51,50,53,55,54,57,59,58,61,63,62,u,u,u,u] 231; AVX512BW-NEXT: vpmovsxbd {{.*#+}} zmm1 = [0,1,2,4,5,6,8,9,10,12,13,14,0,0,0,0] 232; AVX512BW-NEXT: vpermd %zmm0, %zmm1, %zmm0 233; AVX512BW-NEXT: retq 234; 235; AVX512DQ-LABEL: shuffle_v64i8_01_03_02_05_07_06_09_11_10_13_15_14_17_19_18_21_23_22_25_27_26_29_31_30_33_35_34_37_39_38_41_43_42_45_47_46_49_51_50_53_55_54_57_59_58_61_63_62_01_03_02_05_01_03_02_05_01_03_02_05_01_03_02_05: 236; AVX512DQ: # %bb.0: 237; AVX512DQ-NEXT: vpmovsxbd {{.*#+}} ymm1 = [0,1,2,4,5,6,22,23] 238; AVX512DQ-NEXT: vpshufb {{.*#+}} ymm2 = ymm0[1,3,2,5,7,6,9,11,10,13,15,14,u,u,u,u,17,19,18,21,23,22,25,27,26,29,31,30,u,u,u,u] 239; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm3 240; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm4 = xmm3[u,u,u,u,u,u,u,u,1,3,2,5,7,6,9,11] 241; AVX512DQ-NEXT: vinserti128 $1, %xmm4, %ymm0, %ymm4 242; AVX512DQ-NEXT: vpermt2d %zmm4, %zmm1, %zmm2 243; AVX512DQ-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm3[2,3,4,5,6,7] 244; AVX512DQ-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[10,13,15,14,1,3,2,5,u,u,u,u,u,u,u,u,26,29,31,30,17,19,18,21,23,22,25,27,u,u,u,u] 245; AVX512DQ-NEXT: vpmovsxbd {{.*#+}} ymm1 = [0,5,6,4,1,1,1,1] 246; AVX512DQ-NEXT: vpermd %ymm0, %ymm1, %ymm0 247; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm2, %zmm0 248; AVX512DQ-NEXT: retq 249; 250; AVX512VBMI-LABEL: shuffle_v64i8_01_03_02_05_07_06_09_11_10_13_15_14_17_19_18_21_23_22_25_27_26_29_31_30_33_35_34_37_39_38_41_43_42_45_47_46_49_51_50_53_55_54_57_59_58_61_63_62_01_03_02_05_01_03_02_05_01_03_02_05_01_03_02_05: 251; AVX512VBMI: # %bb.0: 252; AVX512VBMI-NEXT: vmovdqa64 {{.*#+}} zmm1 = [1,3,2,5,7,6,9,11,10,13,15,14,17,19,18,21,23,22,25,27,26,29,31,30,33,35,34,37,39,38,41,43,42,45,47,46,49,51,50,53,55,54,57,59,58,61,63,62,1,3,2,5,1,3,2,5,1,3,2,5,1,3,2,5] 253; AVX512VBMI-NEXT: vpermb %zmm0, %zmm1, %zmm0 254; AVX512VBMI-NEXT: retq 255 %shuffle = shufflevector <64 x i8> %a, <64 x i8> poison, <64 x i32> <i32 1, i32 3, i32 2, i32 5, i32 7, i32 6, i32 9, i32 11, i32 10, i32 13, i32 15, i32 14, i32 17, i32 19, i32 18, i32 21, i32 23, i32 22, i32 25, i32 27, i32 26, i32 29, i32 31, i32 30, i32 33, i32 35, i32 34, i32 37, i32 39, i32 38, i32 41, i32 43, i32 42, i32 45, i32 47, i32 46, i32 49, i32 51, i32 50, i32 53, i32 55, i32 54, i32 57, i32 59, i32 58, i32 61, i32 63, i32 62, i32 1, i32 3, i32 2, i32 5, i32 1, i32 3, i32 2, i32 5, i32 1, i32 3, i32 2, i32 5, i32 1, i32 3, i32 2, i32 5> 256 ret <64 x i8> %shuffle 257} 258 259define <64 x i8> @insert_dup_mem_v64i8_i32(ptr %ptr) { 260; AVX512F-LABEL: insert_dup_mem_v64i8_i32: 261; AVX512F: # %bb.0: 262; AVX512F-NEXT: vpbroadcastb (%rdi), %ymm0 263; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 264; AVX512F-NEXT: retq 265; 266; AVX512BW-LABEL: insert_dup_mem_v64i8_i32: 267; AVX512BW: # %bb.0: 268; AVX512BW-NEXT: vpbroadcastb (%rdi), %zmm0 269; AVX512BW-NEXT: retq 270; 271; AVX512DQ-LABEL: insert_dup_mem_v64i8_i32: 272; AVX512DQ: # %bb.0: 273; AVX512DQ-NEXT: vpbroadcastb (%rdi), %ymm0 274; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 275; AVX512DQ-NEXT: retq 276; 277; AVX512VBMI-LABEL: insert_dup_mem_v64i8_i32: 278; AVX512VBMI: # %bb.0: 279; AVX512VBMI-NEXT: vpbroadcastb (%rdi), %zmm0 280; AVX512VBMI-NEXT: retq 281 %tmp = load i32, ptr %ptr, align 4 282 %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0 283 %tmp2 = bitcast <4 x i32> %tmp1 to <16 x i8> 284 %tmp3 = shufflevector <16 x i8> %tmp2, <16 x i8> poison, <64 x i32> zeroinitializer 285 ret <64 x i8> %tmp3 286} 287 288define <64 x i8> @insert_dup_mem_v64i8_sext_i8(ptr %ptr) { 289; AVX512F-LABEL: insert_dup_mem_v64i8_sext_i8: 290; AVX512F: # %bb.0: 291; AVX512F-NEXT: vpbroadcastb (%rdi), %ymm0 292; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 293; AVX512F-NEXT: retq 294; 295; AVX512BW-LABEL: insert_dup_mem_v64i8_sext_i8: 296; AVX512BW: # %bb.0: 297; AVX512BW-NEXT: vpbroadcastb (%rdi), %zmm0 298; AVX512BW-NEXT: retq 299; 300; AVX512DQ-LABEL: insert_dup_mem_v64i8_sext_i8: 301; AVX512DQ: # %bb.0: 302; AVX512DQ-NEXT: vpbroadcastb (%rdi), %ymm0 303; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 304; AVX512DQ-NEXT: retq 305; 306; AVX512VBMI-LABEL: insert_dup_mem_v64i8_sext_i8: 307; AVX512VBMI: # %bb.0: 308; AVX512VBMI-NEXT: vpbroadcastb (%rdi), %zmm0 309; AVX512VBMI-NEXT: retq 310 %tmp = load i8, ptr %ptr, align 1 311 %tmp1 = sext i8 %tmp to i32 312 %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0 313 %tmp3 = bitcast <4 x i32> %tmp2 to <16 x i8> 314 %tmp4 = shufflevector <16 x i8> %tmp3, <16 x i8> poison, <64 x i32> zeroinitializer 315 ret <64 x i8> %tmp4 316} 317 318define <64 x i8> @insert_dup_elt1_mem_v64i8_i32(ptr %ptr) { 319; AVX512F-LABEL: insert_dup_elt1_mem_v64i8_i32: 320; AVX512F: # %bb.0: 321; AVX512F-NEXT: vpbroadcastb 1(%rdi), %ymm0 322; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 323; AVX512F-NEXT: retq 324; 325; AVX512BW-LABEL: insert_dup_elt1_mem_v64i8_i32: 326; AVX512BW: # %bb.0: 327; AVX512BW-NEXT: vpbroadcastb 1(%rdi), %zmm0 328; AVX512BW-NEXT: retq 329; 330; AVX512DQ-LABEL: insert_dup_elt1_mem_v64i8_i32: 331; AVX512DQ: # %bb.0: 332; AVX512DQ-NEXT: vpbroadcastb 1(%rdi), %ymm0 333; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 334; AVX512DQ-NEXT: retq 335; 336; AVX512VBMI-LABEL: insert_dup_elt1_mem_v64i8_i32: 337; AVX512VBMI: # %bb.0: 338; AVX512VBMI-NEXT: vpbroadcastb 1(%rdi), %zmm0 339; AVX512VBMI-NEXT: retq 340 %tmp = load i32, ptr %ptr, align 4 341 %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0 342 %tmp2 = bitcast <4 x i32> %tmp1 to <16 x i8> 343 %tmp3 = shufflevector <16 x i8> %tmp2, <16 x i8> poison, <64 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 344 ret <64 x i8> %tmp3 345} 346 347define <64 x i8> @insert_dup_elt3_mem_v64i8_i32(ptr %ptr) { 348; AVX512F-LABEL: insert_dup_elt3_mem_v64i8_i32: 349; AVX512F: # %bb.0: 350; AVX512F-NEXT: vpbroadcastb 3(%rdi), %ymm0 351; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 352; AVX512F-NEXT: retq 353; 354; AVX512BW-LABEL: insert_dup_elt3_mem_v64i8_i32: 355; AVX512BW: # %bb.0: 356; AVX512BW-NEXT: vpbroadcastb 3(%rdi), %zmm0 357; AVX512BW-NEXT: retq 358; 359; AVX512DQ-LABEL: insert_dup_elt3_mem_v64i8_i32: 360; AVX512DQ: # %bb.0: 361; AVX512DQ-NEXT: vpbroadcastb 3(%rdi), %ymm0 362; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 363; AVX512DQ-NEXT: retq 364; 365; AVX512VBMI-LABEL: insert_dup_elt3_mem_v64i8_i32: 366; AVX512VBMI: # %bb.0: 367; AVX512VBMI-NEXT: vpbroadcastb 3(%rdi), %zmm0 368; AVX512VBMI-NEXT: retq 369 %tmp = load i32, ptr %ptr, align 4 370 %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0 371 %tmp2 = bitcast <4 x i32> %tmp1 to <16 x i8> 372 %tmp3 = shufflevector <16 x i8> %tmp2, <16 x i8> poison, <64 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 373 ret <64 x i8> %tmp3 374} 375 376define <64 x i8> @insert_dup_elt1_mem_v64i8_sext_i8(ptr %ptr) { 377; AVX512F-LABEL: insert_dup_elt1_mem_v64i8_sext_i8: 378; AVX512F: # %bb.0: 379; AVX512F-NEXT: movsbl (%rdi), %eax 380; AVX512F-NEXT: shrl $8, %eax 381; AVX512F-NEXT: vmovd %eax, %xmm0 382; AVX512F-NEXT: vpbroadcastb %xmm0, %ymm0 383; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 384; AVX512F-NEXT: retq 385; 386; AVX512BW-LABEL: insert_dup_elt1_mem_v64i8_sext_i8: 387; AVX512BW: # %bb.0: 388; AVX512BW-NEXT: movsbl (%rdi), %eax 389; AVX512BW-NEXT: shrl $8, %eax 390; AVX512BW-NEXT: vpbroadcastb %eax, %zmm0 391; AVX512BW-NEXT: retq 392; 393; AVX512DQ-LABEL: insert_dup_elt1_mem_v64i8_sext_i8: 394; AVX512DQ: # %bb.0: 395; AVX512DQ-NEXT: movsbl (%rdi), %eax 396; AVX512DQ-NEXT: shrl $8, %eax 397; AVX512DQ-NEXT: vmovd %eax, %xmm0 398; AVX512DQ-NEXT: vpbroadcastb %xmm0, %ymm0 399; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 400; AVX512DQ-NEXT: retq 401; 402; AVX512VBMI-LABEL: insert_dup_elt1_mem_v64i8_sext_i8: 403; AVX512VBMI: # %bb.0: 404; AVX512VBMI-NEXT: movsbl (%rdi), %eax 405; AVX512VBMI-NEXT: shrl $8, %eax 406; AVX512VBMI-NEXT: vpbroadcastb %eax, %zmm0 407; AVX512VBMI-NEXT: retq 408 %tmp = load i8, ptr %ptr, align 1 409 %tmp1 = sext i8 %tmp to i32 410 %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0 411 %tmp3 = bitcast <4 x i32> %tmp2 to <16 x i8> 412 %tmp4 = shufflevector <16 x i8> %tmp3, <16 x i8> poison, <64 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 413 ret <64 x i8> %tmp4 414} 415 416define <64 x i8> @shuffle_v64i8_64_zz_zz_zz_zz_zz_zz_zz_65_zz_zz_zz_zz_zz_zz_zz_66_zz_zz_zz_zz_zz_zz_zz_67_zz_zz_zz_zz_zz_zz_zz_68_zz_zz_zz_zz_zz_zz_zz_69_zz_zz_zz_zz_zz_zz_zz_70_zz_zz_zz_zz_zz_zz_zz_71_zz_zz_zz_zz_zz_zz_zz(<64 x i8> %a) { 417; ALL-LABEL: shuffle_v64i8_64_zz_zz_zz_zz_zz_zz_zz_65_zz_zz_zz_zz_zz_zz_zz_66_zz_zz_zz_zz_zz_zz_zz_67_zz_zz_zz_zz_zz_zz_zz_68_zz_zz_zz_zz_zz_zz_zz_69_zz_zz_zz_zz_zz_zz_zz_70_zz_zz_zz_zz_zz_zz_zz_71_zz_zz_zz_zz_zz_zz_zz: 418; ALL: # %bb.0: 419; ALL-NEXT: vpmovzxbq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,xmm0[4],zero,zero,zero,zero,zero,zero,zero,xmm0[5],zero,zero,zero,zero,zero,zero,zero,xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero 420; ALL-NEXT: retq 421 %shuffle = shufflevector <64 x i8> zeroinitializer, <64 x i8> %a, <64 x i32> <i32 64, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 65, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 66, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 67, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 68, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 69, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 70, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 71, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 422 ret <64 x i8> %shuffle 423} 424 425define <64 x i8> @shuffle_v64i8_64_zz_zz_zz_65_zz_zz_zz_66_zz_zz_zz_67_zz_zz_zz_68_zz_zz_zz_69_zz_zz_zz_70_zz_zz_zz_71_zz_zz_zz_72_zz_zz_zz_73_zz_zz_zz_74_zz_zz_zz_75_zz_zz_zz_76_zz_zz_zz_77_zz_zz_zz_78_zz_zz_zz_79_zz_zz_zz(<64 x i8> %a) { 426; ALL-LABEL: shuffle_v64i8_64_zz_zz_zz_65_zz_zz_zz_66_zz_zz_zz_67_zz_zz_zz_68_zz_zz_zz_69_zz_zz_zz_70_zz_zz_zz_71_zz_zz_zz_72_zz_zz_zz_73_zz_zz_zz_74_zz_zz_zz_75_zz_zz_zz_76_zz_zz_zz_77_zz_zz_zz_78_zz_zz_zz_79_zz_zz_zz: 427; ALL: # %bb.0: 428; ALL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero 429; ALL-NEXT: retq 430 %shuffle = shufflevector <64 x i8> zeroinitializer, <64 x i8> %a, <64 x i32> <i32 64, i32 0, i32 0, i32 0, i32 65, i32 0, i32 0, i32 0, i32 66, i32 0, i32 0, i32 0, i32 67, i32 0, i32 0, i32 0, i32 68, i32 0, i32 0, i32 0, i32 69, i32 0, i32 0, i32 0, i32 70, i32 0, i32 0, i32 0, i32 71, i32 0, i32 0, i32 0, i32 72, i32 0, i32 0, i32 0, i32 73, i32 0, i32 0, i32 0, i32 74, i32 0, i32 0, i32 0, i32 75, i32 0, i32 0, i32 0, i32 76, i32 0, i32 0, i32 0, i32 77, i32 0, i32 0, i32 0, i32 78, i32 0, i32 0, i32 0, i32 79, i32 0, i32 0, i32 0> 431 ret <64 x i8> %shuffle 432} 433 434define <64 x i8> @shuffle_v64i8_64_zz_65_zz_66_zz_67_zz_68_zz_69_zz_70_zz_71_zz_72_zz_73_zz_74_zz_75_zz_76_zz_77_zz_78_zz_79_zz_80_zz_81_zz_82_zz_83_zz_84_zz_85_zz_86_zz_87_zz_88_zz_89_zz_90_zz_91_zz_92_zz_93_zz_94_zz_95_zz(<64 x i8> %a) { 435; AVX512F-LABEL: shuffle_v64i8_64_zz_65_zz_66_zz_67_zz_68_zz_69_zz_70_zz_71_zz_72_zz_73_zz_74_zz_75_zz_76_zz_77_zz_78_zz_79_zz_80_zz_81_zz_82_zz_83_zz_84_zz_85_zz_86_zz_87_zz_88_zz_89_zz_90_zz_91_zz_92_zz_93_zz_94_zz_95_zz: 436; AVX512F: # %bb.0: 437; AVX512F-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 438; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0 439; AVX512F-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 440; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 441; AVX512F-NEXT: retq 442; 443; AVX512BW-LABEL: shuffle_v64i8_64_zz_65_zz_66_zz_67_zz_68_zz_69_zz_70_zz_71_zz_72_zz_73_zz_74_zz_75_zz_76_zz_77_zz_78_zz_79_zz_80_zz_81_zz_82_zz_83_zz_84_zz_85_zz_86_zz_87_zz_88_zz_89_zz_90_zz_91_zz_92_zz_93_zz_94_zz_95_zz: 444; AVX512BW: # %bb.0: 445; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero 446; AVX512BW-NEXT: retq 447; 448; AVX512DQ-LABEL: shuffle_v64i8_64_zz_65_zz_66_zz_67_zz_68_zz_69_zz_70_zz_71_zz_72_zz_73_zz_74_zz_75_zz_76_zz_77_zz_78_zz_79_zz_80_zz_81_zz_82_zz_83_zz_84_zz_85_zz_86_zz_87_zz_88_zz_89_zz_90_zz_91_zz_92_zz_93_zz_94_zz_95_zz: 449; AVX512DQ: # %bb.0: 450; AVX512DQ-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 451; AVX512DQ-NEXT: vextracti128 $1, %ymm0, %xmm0 452; AVX512DQ-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 453; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 454; AVX512DQ-NEXT: retq 455; 456; AVX512VBMI-LABEL: shuffle_v64i8_64_zz_65_zz_66_zz_67_zz_68_zz_69_zz_70_zz_71_zz_72_zz_73_zz_74_zz_75_zz_76_zz_77_zz_78_zz_79_zz_80_zz_81_zz_82_zz_83_zz_84_zz_85_zz_86_zz_87_zz_88_zz_89_zz_90_zz_91_zz_92_zz_93_zz_94_zz_95_zz: 457; AVX512VBMI: # %bb.0: 458; AVX512VBMI-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero 459; AVX512VBMI-NEXT: retq 460 %shuffle = shufflevector <64 x i8> zeroinitializer, <64 x i8> %a, <64 x i32> <i32 64, i32 0, i32 65, i32 0, i32 66, i32 0, i32 67, i32 0, i32 68, i32 0, i32 69, i32 0, i32 70, i32 0, i32 71, i32 0, i32 72, i32 0, i32 73, i32 0, i32 74, i32 0, i32 75, i32 0, i32 76, i32 0, i32 77, i32 0, i32 78, i32 0, i32 79, i32 0, i32 80, i32 0, i32 81, i32 0, i32 82, i32 0, i32 83, i32 0, i32 84, i32 0, i32 85, i32 0, i32 86, i32 0, i32 87, i32 0, i32 88, i32 0, i32 89, i32 0, i32 90, i32 0, i32 91, i32 0, i32 92, i32 0, i32 93, i32 0, i32 94, i32 0, i32 95, i32 0> 461 ret <64 x i8> %shuffle 462} 463 464define <64 x i8> @shuffle_v64i8_63_zz_61_zz_59_zz_57_zz_55_zz_53_zz_51_zz_49_zz_47_zz_45_zz_43_zz_41_zz_39_zz_37_zz_35_zz_33_zz_31_zz_29_zz_27_zz_25_zz_23_zz_21_zz_19_zz_17_zz_15_zz_13_zz_11_zz_9_zz_7_zz_5_zz_3_zz_1_zz(<64 x i8> %a) { 465; AVX512F-LABEL: shuffle_v64i8_63_zz_61_zz_59_zz_57_zz_55_zz_53_zz_51_zz_49_zz_47_zz_45_zz_43_zz_41_zz_39_zz_37_zz_35_zz_33_zz_31_zz_29_zz_27_zz_25_zz_23_zz_21_zz_19_zz_17_zz_15_zz_13_zz_11_zz_9_zz_7_zz_5_zz_3_zz_1_zz: 466; AVX512F: # %bb.0: 467; AVX512F-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1] 468; AVX512F-NEXT: vbroadcasti128 {{.*#+}} ymm2 = [15,128,13,128,11,128,9,128,7,128,5,128,3,128,1,128,15,128,13,128,11,128,9,128,7,128,5,128,3,128,1,128] 469; AVX512F-NEXT: # ymm2 = mem[0,1,0,1] 470; AVX512F-NEXT: vpshufb %ymm2, %ymm1, %ymm1 471; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm0 472; AVX512F-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1] 473; AVX512F-NEXT: vpshufb %ymm2, %ymm0, %ymm0 474; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 475; AVX512F-NEXT: retq 476; 477; AVX512BW-LABEL: shuffle_v64i8_63_zz_61_zz_59_zz_57_zz_55_zz_53_zz_51_zz_49_zz_47_zz_45_zz_43_zz_41_zz_39_zz_37_zz_35_zz_33_zz_31_zz_29_zz_27_zz_25_zz_23_zz_21_zz_19_zz_17_zz_15_zz_13_zz_11_zz_9_zz_7_zz_5_zz_3_zz_1_zz: 478; AVX512BW: # %bb.0: 479; AVX512BW-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[6,7,4,5,2,3,0,1] 480; AVX512BW-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[15],zero,zmm0[13],zero,zmm0[11],zero,zmm0[9],zero,zmm0[7],zero,zmm0[5],zero,zmm0[3],zero,zmm0[1],zero,zmm0[31],zero,zmm0[29],zero,zmm0[27],zero,zmm0[25],zero,zmm0[23],zero,zmm0[21],zero,zmm0[19],zero,zmm0[17],zero,zmm0[47],zero,zmm0[45],zero,zmm0[43],zero,zmm0[41],zero,zmm0[39],zero,zmm0[37],zero,zmm0[35],zero,zmm0[33],zero,zmm0[63],zero,zmm0[61],zero,zmm0[59],zero,zmm0[57],zero,zmm0[55],zero,zmm0[53],zero,zmm0[51],zero,zmm0[49],zero 481; AVX512BW-NEXT: retq 482; 483; AVX512DQ-LABEL: shuffle_v64i8_63_zz_61_zz_59_zz_57_zz_55_zz_53_zz_51_zz_49_zz_47_zz_45_zz_43_zz_41_zz_39_zz_37_zz_35_zz_33_zz_31_zz_29_zz_27_zz_25_zz_23_zz_21_zz_19_zz_17_zz_15_zz_13_zz_11_zz_9_zz_7_zz_5_zz_3_zz_1_zz: 484; AVX512DQ: # %bb.0: 485; AVX512DQ-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1] 486; AVX512DQ-NEXT: vbroadcasti128 {{.*#+}} ymm2 = [15,128,13,128,11,128,9,128,7,128,5,128,3,128,1,128,15,128,13,128,11,128,9,128,7,128,5,128,3,128,1,128] 487; AVX512DQ-NEXT: # ymm2 = mem[0,1,0,1] 488; AVX512DQ-NEXT: vpshufb %ymm2, %ymm1, %ymm1 489; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm0 490; AVX512DQ-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1] 491; AVX512DQ-NEXT: vpshufb %ymm2, %ymm0, %ymm0 492; AVX512DQ-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 493; AVX512DQ-NEXT: retq 494; 495; AVX512VBMI-LABEL: shuffle_v64i8_63_zz_61_zz_59_zz_57_zz_55_zz_53_zz_51_zz_49_zz_47_zz_45_zz_43_zz_41_zz_39_zz_37_zz_35_zz_33_zz_31_zz_29_zz_27_zz_25_zz_23_zz_21_zz_19_zz_17_zz_15_zz_13_zz_11_zz_9_zz_7_zz_5_zz_3_zz_1_zz: 496; AVX512VBMI: # %bb.0: 497; AVX512VBMI-NEXT: vpxor %xmm1, %xmm1, %xmm1 498; AVX512VBMI-NEXT: vmovdqa64 {{.*#+}} zmm2 = [63,65,61,67,59,69,57,71,55,73,53,75,51,77,49,79,47,81,45,83,43,85,41,87,39,89,37,91,35,93,33,95,31,97,29,99,27,101,25,103,23,105,21,107,19,109,17,111,15,113,13,115,11,117,9,119,7,121,5,123,3,125,1,127] 499; AVX512VBMI-NEXT: vpermt2b %zmm1, %zmm2, %zmm0 500; AVX512VBMI-NEXT: retq 501 %shuffle = shufflevector <64 x i8> %a, <64 x i8> zeroinitializer, <64 x i32> <i32 63, i32 64, i32 61, i32 64, i32 59, i32 64, i32 57, i32 64, i32 55, i32 64, i32 53, i32 64, i32 51, i32 64, i32 49, i32 64, i32 47, i32 64, i32 45, i32 64, i32 43, i32 64, i32 41, i32 64, i32 39, i32 64, i32 37, i32 64, i32 35, i32 64, i32 33, i32 64, i32 31, i32 64, i32 29, i32 64, i32 27, i32 64, i32 25, i32 64, i32 23, i32 64, i32 21, i32 64, i32 19, i32 64, i32 17, i32 64, i32 15, i32 64, i32 13, i32 64, i32 11, i32 64, i32 9, i32 64, i32 7, i32 64, i32 5, i32 64, i32 3, i32 64, i32 1, i32 64> 502 ret <64 x i8> %shuffle 503} 504 505define <64 x i8> @shuffle_v64i8_63_64_61_66_59_68_57_70_55_72_53_74_51_76_49_78_47_80_45_82_43_84_41_86_39_88_37_90_35_92_33_94_31_96_29_98_27_100_25_102_23_104_21_106_19_108_17_110_15_112_13_114_11_116_9_118_7_120_5_122_3_124_1_126(<64 x i8> %a, <64 x i8> %b) { 506; AVX512F-LABEL: shuffle_v64i8_63_64_61_66_59_68_57_70_55_72_53_74_51_76_49_78_47_80_45_82_43_84_41_86_39_88_37_90_35_92_33_94_31_96_29_98_27_100_25_102_23_104_21_106_19_108_17_110_15_112_13_114_11_116_9_118_7_120_5_122_3_124_1_126: 507; AVX512F: # %bb.0: 508; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm2 509; AVX512F-NEXT: vpermq {{.*#+}} ymm3 = ymm0[2,3,0,1] 510; AVX512F-NEXT: vpbroadcastw {{.*#+}} ymm4 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] 511; AVX512F-NEXT: vpblendvb %ymm4, %ymm2, %ymm3, %ymm2 512; AVX512F-NEXT: vbroadcasti128 {{.*#+}} ymm3 = [15,0,13,2,11,4,9,6,7,8,5,10,3,12,1,14,15,0,13,2,11,4,9,6,7,8,5,10,3,12,1,14] 513; AVX512F-NEXT: # ymm3 = mem[0,1,0,1] 514; AVX512F-NEXT: vpshufb %ymm3, %ymm2, %ymm2 515; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm0 516; AVX512F-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1] 517; AVX512F-NEXT: vpblendvb %ymm4, %ymm1, %ymm0, %ymm0 518; AVX512F-NEXT: vpshufb %ymm3, %ymm0, %ymm0 519; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 520; AVX512F-NEXT: retq 521; 522; AVX512BW-LABEL: shuffle_v64i8_63_64_61_66_59_68_57_70_55_72_53_74_51_76_49_78_47_80_45_82_43_84_41_86_39_88_37_90_35_92_33_94_31_96_29_98_27_100_25_102_23_104_21_106_19_108_17_110_15_112_13_114_11_116_9_118_7_120_5_122_3_124_1_126: 523; AVX512BW: # %bb.0: 524; AVX512BW-NEXT: vpsllw $8, %zmm1, %zmm1 525; AVX512BW-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[6,7,4,5,2,3,0,1] 526; AVX512BW-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[15],zero,zmm0[13],zero,zmm0[11],zero,zmm0[9],zero,zmm0[7],zero,zmm0[5],zero,zmm0[3],zero,zmm0[1],zero,zmm0[31],zero,zmm0[29],zero,zmm0[27],zero,zmm0[25],zero,zmm0[23],zero,zmm0[21],zero,zmm0[19],zero,zmm0[17],zero,zmm0[47],zero,zmm0[45],zero,zmm0[43],zero,zmm0[41],zero,zmm0[39],zero,zmm0[37],zero,zmm0[35],zero,zmm0[33],zero,zmm0[63],zero,zmm0[61],zero,zmm0[59],zero,zmm0[57],zero,zmm0[55],zero,zmm0[53],zero,zmm0[51],zero,zmm0[49],zero 527; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0 528; AVX512BW-NEXT: retq 529; 530; AVX512DQ-LABEL: shuffle_v64i8_63_64_61_66_59_68_57_70_55_72_53_74_51_76_49_78_47_80_45_82_43_84_41_86_39_88_37_90_35_92_33_94_31_96_29_98_27_100_25_102_23_104_21_106_19_108_17_110_15_112_13_114_11_116_9_118_7_120_5_122_3_124_1_126: 531; AVX512DQ: # %bb.0: 532; AVX512DQ-NEXT: vextracti64x4 $1, %zmm1, %ymm2 533; AVX512DQ-NEXT: vpermq {{.*#+}} ymm3 = ymm0[2,3,0,1] 534; AVX512DQ-NEXT: vpbroadcastw {{.*#+}} ymm4 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] 535; AVX512DQ-NEXT: vpblendvb %ymm4, %ymm2, %ymm3, %ymm2 536; AVX512DQ-NEXT: vbroadcasti128 {{.*#+}} ymm3 = [15,0,13,2,11,4,9,6,7,8,5,10,3,12,1,14,15,0,13,2,11,4,9,6,7,8,5,10,3,12,1,14] 537; AVX512DQ-NEXT: # ymm3 = mem[0,1,0,1] 538; AVX512DQ-NEXT: vpshufb %ymm3, %ymm2, %ymm2 539; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm0 540; AVX512DQ-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1] 541; AVX512DQ-NEXT: vpblendvb %ymm4, %ymm1, %ymm0, %ymm0 542; AVX512DQ-NEXT: vpshufb %ymm3, %ymm0, %ymm0 543; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 544; AVX512DQ-NEXT: retq 545; 546; AVX512VBMI-LABEL: shuffle_v64i8_63_64_61_66_59_68_57_70_55_72_53_74_51_76_49_78_47_80_45_82_43_84_41_86_39_88_37_90_35_92_33_94_31_96_29_98_27_100_25_102_23_104_21_106_19_108_17_110_15_112_13_114_11_116_9_118_7_120_5_122_3_124_1_126: 547; AVX512VBMI: # %bb.0: 548; AVX512VBMI-NEXT: vmovdqa64 {{.*#+}} zmm2 = [63,64,61,66,59,68,57,70,55,72,53,74,51,76,49,78,47,80,45,82,43,84,41,86,39,88,37,90,35,92,33,94,31,96,29,98,27,100,25,102,23,104,21,106,19,108,17,110,15,112,13,114,11,116,9,118,7,120,5,122,3,124,1,126] 549; AVX512VBMI-NEXT: vpermt2b %zmm1, %zmm2, %zmm0 550; AVX512VBMI-NEXT: retq 551 %shuffle = shufflevector <64 x i8> %a, <64 x i8> %b, <64 x i32> <i32 63, i32 64, i32 61, i32 66, i32 59, i32 68, i32 57, i32 70, i32 55, i32 72, i32 53, i32 74, i32 51, i32 76, i32 49, i32 78, i32 47, i32 80, i32 45, i32 82, i32 43, i32 84, i32 41, i32 86, i32 39, i32 88, i32 37, i32 90, i32 35, i32 92, i32 33, i32 94, i32 31, i32 96, i32 29, i32 98, i32 27, i32 100, i32 25, i32 102, i32 23, i32 104, i32 21, i32 106, i32 19, i32 108, i32 17, i32 110, i32 15, i32 112, i32 13, i32 114, i32 11, i32 116, i32 9, i32 118, i32 7, i32 120, i32 5, i32 122, i32 3, i32 124, i32 1, i32 126> 552 ret <64 x i8> %shuffle 553} 554 555define <64 x i8> @shuffle_v64i8_01_03_07_09_13_15_19_21_25_27_31_33_37_39_43_45_49_51_55_57_61_63_67_69_73_75_79_81_85_87_91_93_97_99_103_105_109_111_115_117_121_123_127_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u(<64 x i8> %a0, <64 x i8> %a1) { 556; AVX512F-LABEL: shuffle_v64i8_01_03_07_09_13_15_19_21_25_27_31_33_37_39_43_45_49_51_55_57_61_63_67_69_73_75_79_81_85_87_91_93_97_99_103_105_109_111_115_117_121_123_127_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u: 557; AVX512F: # %bb.0: 558; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm2 559; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [128,128,128,128,128,128,3,5,9,11,15,u,u,u,u,u] 560; AVX512F-NEXT: vpshufb %xmm3, %xmm2, %xmm2 561; AVX512F-NEXT: vmovdqa {{.*#+}} xmm4 = [1,3,7,9,13,15,128,128,128,128,128,u,u,u,u,u] 562; AVX512F-NEXT: vpshufb %xmm4, %xmm0, %xmm5 563; AVX512F-NEXT: vpor %xmm2, %xmm5, %xmm2 564; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm0 565; AVX512F-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,u,u,u,u,u,u,u,u,u,1,5,7,11,13,17,19,23,25,29,31,u,u,u,u,u,u,u,u,u,u] 566; AVX512F-NEXT: vpmovsxdq {{.*#+}} ymm5 = [18446744073709551615,16777215,0,0] 567; AVX512F-NEXT: vpblendvb %ymm5, %ymm2, %ymm0, %ymm0 568; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 569; AVX512F-NEXT: vpshufb {{.*#+}} xmm2 = xmm2[u,u,u,u,u,u],zero,zero,zero,zero,zero,xmm2[1,5,7,11,13] 570; AVX512F-NEXT: vpshufb {{.*#+}} xmm5 = xmm1[u,u,u,u,u,u,3,5,9,11,15],zero,zero,zero,zero,zero 571; AVX512F-NEXT: vpor %xmm2, %xmm5, %xmm2 572; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 573; AVX512F-NEXT: vpblendw {{.*#+}} ymm2 = ymm0[0,1,2],ymm2[3,4,5,6,7],ymm0[8,9,10],ymm2[11,12,13,14,15] 574; AVX512F-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm2[4,5,6,7] 575; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm1 576; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 577; AVX512F-NEXT: vpshufb %xmm3, %xmm2, %xmm2 578; AVX512F-NEXT: vpshufb %xmm4, %xmm1, %xmm1 579; AVX512F-NEXT: vpor %xmm2, %xmm1, %xmm1 580; AVX512F-NEXT: vinserti32x4 $2, %xmm1, %zmm0, %zmm0 581; AVX512F-NEXT: retq 582; 583; AVX512BW-LABEL: shuffle_v64i8_01_03_07_09_13_15_19_21_25_27_31_33_37_39_43_45_49_51_55_57_61_63_67_69_73_75_79_81_85_87_91_93_97_99_103_105_109_111_115_117_121_123_127_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u: 584; AVX512BW: # %bb.0: 585; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm2 586; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [128,128,128,128,128,128,3,5,9,11,15,u,u,u,u,u] 587; AVX512BW-NEXT: vpshufb %xmm3, %xmm2, %xmm2 588; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm4 = [1,3,7,9,13,15,128,128,128,128,128,u,u,u,u,u] 589; AVX512BW-NEXT: vpshufb %xmm4, %xmm0, %xmm5 590; AVX512BW-NEXT: vpor %xmm2, %xmm5, %xmm2 591; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm0 592; AVX512BW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,u,u,u,u,u,u,u,u,u,1,5,7,11,13,17,19,23,25,29,31,u,u,u,u,u,u,u,u,u,u] 593; AVX512BW-NEXT: vpmovsxdq {{.*#+}} ymm5 = [18446744073709551615,16777215,0,0] 594; AVX512BW-NEXT: vpblendvb %ymm5, %ymm2, %ymm0, %ymm0 595; AVX512BW-NEXT: vextracti128 $1, %ymm1, %xmm2 596; AVX512BW-NEXT: vpshufb {{.*#+}} xmm2 = xmm2[u,u,u,u,u,u],zero,zero,zero,zero,zero,xmm2[1,5,7,11,13] 597; AVX512BW-NEXT: vpshufb {{.*#+}} xmm5 = xmm1[u,u,u,u,u,u,3,5,9,11,15],zero,zero,zero,zero,zero 598; AVX512BW-NEXT: vpor %xmm2, %xmm5, %xmm2 599; AVX512BW-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 600; AVX512BW-NEXT: vpblendw {{.*#+}} ymm2 = ymm0[0,1,2],ymm2[3,4,5,6,7],ymm0[8,9,10],ymm2[11,12,13,14,15] 601; AVX512BW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm2[4,5,6,7] 602; AVX512BW-NEXT: vextracti64x4 $1, %zmm1, %ymm1 603; AVX512BW-NEXT: vextracti128 $1, %ymm1, %xmm2 604; AVX512BW-NEXT: vpshufb %xmm3, %xmm2, %xmm2 605; AVX512BW-NEXT: vpshufb %xmm4, %xmm1, %xmm1 606; AVX512BW-NEXT: vpor %xmm2, %xmm1, %xmm1 607; AVX512BW-NEXT: vinserti32x4 $2, %xmm1, %zmm0, %zmm0 608; AVX512BW-NEXT: retq 609; 610; AVX512DQ-LABEL: shuffle_v64i8_01_03_07_09_13_15_19_21_25_27_31_33_37_39_43_45_49_51_55_57_61_63_67_69_73_75_79_81_85_87_91_93_97_99_103_105_109_111_115_117_121_123_127_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u: 611; AVX512DQ: # %bb.0: 612; AVX512DQ-NEXT: vextracti128 $1, %ymm0, %xmm2 613; AVX512DQ-NEXT: vmovdqa {{.*#+}} xmm3 = [128,128,128,128,128,128,3,5,9,11,15,u,u,u,u,u] 614; AVX512DQ-NEXT: vpshufb %xmm3, %xmm2, %xmm2 615; AVX512DQ-NEXT: vmovdqa {{.*#+}} xmm4 = [1,3,7,9,13,15,128,128,128,128,128,u,u,u,u,u] 616; AVX512DQ-NEXT: vpshufb %xmm4, %xmm0, %xmm5 617; AVX512DQ-NEXT: vpor %xmm2, %xmm5, %xmm2 618; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm0 619; AVX512DQ-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,u,u,u,u,u,u,u,u,u,1,5,7,11,13,17,19,23,25,29,31,u,u,u,u,u,u,u,u,u,u] 620; AVX512DQ-NEXT: vpmovsxdq {{.*#+}} ymm5 = [18446744073709551615,16777215,0,0] 621; AVX512DQ-NEXT: vpblendvb %ymm5, %ymm2, %ymm0, %ymm0 622; AVX512DQ-NEXT: vextracti128 $1, %ymm1, %xmm2 623; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm2 = xmm2[u,u,u,u,u,u],zero,zero,zero,zero,zero,xmm2[1,5,7,11,13] 624; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm5 = xmm1[u,u,u,u,u,u,3,5,9,11,15],zero,zero,zero,zero,zero 625; AVX512DQ-NEXT: vpor %xmm2, %xmm5, %xmm2 626; AVX512DQ-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 627; AVX512DQ-NEXT: vpblendw {{.*#+}} ymm2 = ymm0[0,1,2],ymm2[3,4,5,6,7],ymm0[8,9,10],ymm2[11,12,13,14,15] 628; AVX512DQ-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm2[4,5,6,7] 629; AVX512DQ-NEXT: vextracti64x4 $1, %zmm1, %ymm1 630; AVX512DQ-NEXT: vextracti128 $1, %ymm1, %xmm2 631; AVX512DQ-NEXT: vpshufb %xmm3, %xmm2, %xmm2 632; AVX512DQ-NEXT: vpshufb %xmm4, %xmm1, %xmm1 633; AVX512DQ-NEXT: vpor %xmm2, %xmm1, %xmm1 634; AVX512DQ-NEXT: vinserti32x4 $2, %xmm1, %zmm0, %zmm0 635; AVX512DQ-NEXT: retq 636; 637; AVX512VBMI-LABEL: shuffle_v64i8_01_03_07_09_13_15_19_21_25_27_31_33_37_39_43_45_49_51_55_57_61_63_67_69_73_75_79_81_85_87_91_93_97_99_103_105_109_111_115_117_121_123_127_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u: 638; AVX512VBMI: # %bb.0: 639; AVX512VBMI-NEXT: vmovdqa64 {{.*#+}} zmm2 = [1,3,7,9,13,15,19,21,25,27,31,33,37,39,43,45,49,51,55,57,61,63,67,69,73,75,79,81,85,87,91,93,97,99,103,105,109,111,115,117,121,123,127,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u] 640; AVX512VBMI-NEXT: vpermt2b %zmm1, %zmm2, %zmm0 641; AVX512VBMI-NEXT: retq 642 %r = shufflevector <64 x i8> %a0, <64 x i8> %a1, <64 x i32> <i32 1, i32 3, i32 7, i32 9, i32 13, i32 15, i32 19, i32 21, i32 25, i32 27, i32 31, i32 33, i32 37, i32 39, i32 43, i32 45, i32 49, i32 51, i32 55, i32 57, i32 61, i32 63, i32 67, i32 69, i32 73, i32 75, i32 79, i32 81, i32 85, i32 87, i32 91, i32 93, i32 97, i32 99, i32 103, i32 105, i32 109, i32 111, i32 115, i32 117, i32 121, i32 123, i32 127, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 643 ret <64 x i8> %r 644} 645 646define <64 x i8> @shuffle_v64i8_01_05_07_11_13_17_19_23_25_29_31_35_37_41_43_47_49_53_55_59_61_65_67_71_73_77_79_83_85_89_91_95_97_101_103_107_109_113_115_119_121_125_127_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u(<64 x i8> %a0, <64 x i8> %a1) { 647; AVX512F-LABEL: shuffle_v64i8_01_05_07_11_13_17_19_23_25_29_31_35_37_41_43_47_49_53_55_59_61_65_67_71_73_77_79_83_85_89_91_95_97_101_103_107_109_113_115_119_121_125_127_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u: 648; AVX512F: # %bb.0: 649; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm2 650; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [1,5,7,11,13,128,128,128,128,128,128,u,u,u,u,u] 651; AVX512F-NEXT: vpshufb %xmm3, %xmm2, %xmm4 652; AVX512F-NEXT: vextracti128 $1, %ymm2, %xmm2 653; AVX512F-NEXT: vmovdqa {{.*#+}} xmm5 = [128,128,128,128,128,1,3,7,9,13,15,u,u,u,u,u] 654; AVX512F-NEXT: vpshufb %xmm5, %xmm2, %xmm2 655; AVX512F-NEXT: vpor %xmm4, %xmm2, %xmm2 656; AVX512F-NEXT: vpshufb %xmm3, %xmm0, %xmm3 657; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm4 658; AVX512F-NEXT: vpshufb %xmm5, %xmm4, %xmm4 659; AVX512F-NEXT: vpor %xmm3, %xmm4, %xmm3 660; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm0 661; AVX512F-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,u,u,u,u,u,u,u,u,u,3,5,9,11,15,17,21,23,27,29,u,u,u,u,u,u,u,u,u,u,u] 662; AVX512F-NEXT: vpmovsxdq {{.*#+}} ymm4 = [18446744073709551615,16777215,0,0] 663; AVX512F-NEXT: vpblendvb %ymm4, %ymm3, %ymm0, %ymm0 664; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm3 665; AVX512F-NEXT: vpshufb {{.*#+}} xmm3 = xmm3[u,u,u,u,u],zero,zero,zero,zero,zero,zero,xmm3[3,5,9,11,15] 666; AVX512F-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[u,u,u,u,u,1,3,7,9,13,15],zero,zero,zero,zero,zero 667; AVX512F-NEXT: vpor %xmm3, %xmm1, %xmm1 668; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 669; AVX512F-NEXT: vpmovsxwd {{.*#+}} ymm3 = [4294967295,4294967295,4294967295,4294967295,4294967295,255,0,0] 670; AVX512F-NEXT: vpblendvb %ymm3, %ymm0, %ymm1, %ymm0 671; AVX512F-NEXT: vinserti32x4 $2, %xmm2, %zmm0, %zmm0 672; AVX512F-NEXT: retq 673; 674; AVX512BW-LABEL: shuffle_v64i8_01_05_07_11_13_17_19_23_25_29_31_35_37_41_43_47_49_53_55_59_61_65_67_71_73_77_79_83_85_89_91_95_97_101_103_107_109_113_115_119_121_125_127_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u: 675; AVX512BW: # %bb.0: 676; AVX512BW-NEXT: vextracti64x4 $1, %zmm1, %ymm2 677; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [1,5,7,11,13,128,128,128,128,128,128,u,u,u,u,u] 678; AVX512BW-NEXT: vpshufb %xmm3, %xmm2, %xmm4 679; AVX512BW-NEXT: vextracti128 $1, %ymm2, %xmm2 680; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm5 = [128,128,128,128,128,1,3,7,9,13,15,u,u,u,u,u] 681; AVX512BW-NEXT: vpshufb %xmm5, %xmm2, %xmm2 682; AVX512BW-NEXT: vpor %xmm4, %xmm2, %xmm2 683; AVX512BW-NEXT: vpshufb %xmm3, %xmm0, %xmm3 684; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm4 685; AVX512BW-NEXT: vpshufb %xmm5, %xmm4, %xmm4 686; AVX512BW-NEXT: vpor %xmm3, %xmm4, %xmm3 687; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm0 688; AVX512BW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,u,u,u,u,u,u,u,u,u,3,5,9,11,15,17,21,23,27,29,u,u,u,u,u,u,u,u,u,u,u] 689; AVX512BW-NEXT: vpmovsxdq {{.*#+}} ymm4 = [18446744073709551615,16777215,0,0] 690; AVX512BW-NEXT: vpblendvb %ymm4, %ymm3, %ymm0, %ymm0 691; AVX512BW-NEXT: vextracti128 $1, %ymm1, %xmm3 692; AVX512BW-NEXT: vpshufb {{.*#+}} xmm3 = xmm3[u,u,u,u,u],zero,zero,zero,zero,zero,zero,xmm3[3,5,9,11,15] 693; AVX512BW-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[u,u,u,u,u,1,3,7,9,13,15],zero,zero,zero,zero,zero 694; AVX512BW-NEXT: vpor %xmm3, %xmm1, %xmm1 695; AVX512BW-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 696; AVX512BW-NEXT: vpmovsxwd {{.*#+}} ymm3 = [4294967295,4294967295,4294967295,4294967295,4294967295,255,0,0] 697; AVX512BW-NEXT: vpblendvb %ymm3, %ymm0, %ymm1, %ymm0 698; AVX512BW-NEXT: vinserti32x4 $2, %xmm2, %zmm0, %zmm0 699; AVX512BW-NEXT: retq 700; 701; AVX512DQ-LABEL: shuffle_v64i8_01_05_07_11_13_17_19_23_25_29_31_35_37_41_43_47_49_53_55_59_61_65_67_71_73_77_79_83_85_89_91_95_97_101_103_107_109_113_115_119_121_125_127_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u: 702; AVX512DQ: # %bb.0: 703; AVX512DQ-NEXT: vextracti64x4 $1, %zmm1, %ymm2 704; AVX512DQ-NEXT: vmovdqa {{.*#+}} xmm3 = [1,5,7,11,13,128,128,128,128,128,128,u,u,u,u,u] 705; AVX512DQ-NEXT: vpshufb %xmm3, %xmm2, %xmm4 706; AVX512DQ-NEXT: vextracti128 $1, %ymm2, %xmm2 707; AVX512DQ-NEXT: vmovdqa {{.*#+}} xmm5 = [128,128,128,128,128,1,3,7,9,13,15,u,u,u,u,u] 708; AVX512DQ-NEXT: vpshufb %xmm5, %xmm2, %xmm2 709; AVX512DQ-NEXT: vpor %xmm4, %xmm2, %xmm2 710; AVX512DQ-NEXT: vpshufb %xmm3, %xmm0, %xmm3 711; AVX512DQ-NEXT: vextracti128 $1, %ymm0, %xmm4 712; AVX512DQ-NEXT: vpshufb %xmm5, %xmm4, %xmm4 713; AVX512DQ-NEXT: vpor %xmm3, %xmm4, %xmm3 714; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm0 715; AVX512DQ-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,u,u,u,u,u,u,u,u,u,3,5,9,11,15,17,21,23,27,29,u,u,u,u,u,u,u,u,u,u,u] 716; AVX512DQ-NEXT: vpmovsxdq {{.*#+}} ymm4 = [18446744073709551615,16777215,0,0] 717; AVX512DQ-NEXT: vpblendvb %ymm4, %ymm3, %ymm0, %ymm0 718; AVX512DQ-NEXT: vextracti128 $1, %ymm1, %xmm3 719; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm3 = xmm3[u,u,u,u,u],zero,zero,zero,zero,zero,zero,xmm3[3,5,9,11,15] 720; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[u,u,u,u,u,1,3,7,9,13,15],zero,zero,zero,zero,zero 721; AVX512DQ-NEXT: vpor %xmm3, %xmm1, %xmm1 722; AVX512DQ-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 723; AVX512DQ-NEXT: vpmovsxwd {{.*#+}} ymm3 = [4294967295,4294967295,4294967295,4294967295,4294967295,255,0,0] 724; AVX512DQ-NEXT: vpblendvb %ymm3, %ymm0, %ymm1, %ymm0 725; AVX512DQ-NEXT: vinserti32x4 $2, %xmm2, %zmm0, %zmm0 726; AVX512DQ-NEXT: retq 727; 728; AVX512VBMI-LABEL: shuffle_v64i8_01_05_07_11_13_17_19_23_25_29_31_35_37_41_43_47_49_53_55_59_61_65_67_71_73_77_79_83_85_89_91_95_97_101_103_107_109_113_115_119_121_125_127_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u: 729; AVX512VBMI: # %bb.0: 730; AVX512VBMI-NEXT: vmovdqa64 {{.*#+}} zmm2 = [65,69,71,75,77,81,83,87,89,93,95,99,101,105,107,111,113,117,119,123,125,1,3,7,9,13,15,19,21,25,27,31,33,37,39,43,45,49,51,55,57,61,63,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u] 731; AVX512VBMI-NEXT: vpermi2b %zmm0, %zmm1, %zmm2 732; AVX512VBMI-NEXT: vmovdqa64 %zmm2, %zmm0 733; AVX512VBMI-NEXT: retq 734 %r = shufflevector <64 x i8> %a0, <64 x i8> %a1, <64 x i32> <i32 1, i32 5, i32 7, i32 11, i32 13, i32 17, i32 19, i32 23, i32 25, i32 29, i32 31, i32 35, i32 37, i32 41, i32 43, i32 47, i32 49, i32 53, i32 55, i32 59, i32 61, i32 65, i32 67, i32 71, i32 73, i32 77, i32 79, i32 83, i32 85, i32 89, i32 91, i32 95, i32 97, i32 101, i32 103, i32 107, i32 109, i32 113, i32 115, i32 119, i32 121, i32 125, i32 127, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 735 ret <64 x i8> %r 736} 737 738define <64 x i8> @shuffle_v64i8_02_04_08_10_14_16_20_22_26_28_32_34_38_40_44_46_50_52_56_58_62_64_68_70_74_76_80_82_86_88_92_94_98_100_104_106_110_112_116_118_122_124_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u(<64 x i8> %a0, <64 x i8> %a1) { 739; AVX512F-LABEL: shuffle_v64i8_02_04_08_10_14_16_20_22_26_28_32_34_38_40_44_46_50_52_56_58_62_64_68_70_74_76_80_82_86_88_92_94_98_100_104_106_110_112_116_118_122_124_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u: 740; AVX512F: # %bb.0: 741; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm2 742; AVX512F-NEXT: vextracti128 $1, %ymm2, %xmm3 743; AVX512F-NEXT: vmovdqa {{.*#+}} xmm4 = [128,128,128,128,128,0,4,6,10,12,u,u,u,u,u,u] 744; AVX512F-NEXT: vpshufb %xmm4, %xmm3, %xmm3 745; AVX512F-NEXT: vmovdqa {{.*#+}} xmm5 = [2,4,8,10,14,128,128,128,128,128,u,u,u,u,u,u] 746; AVX512F-NEXT: vpshufb %xmm5, %xmm2, %xmm2 747; AVX512F-NEXT: vpor %xmm3, %xmm2, %xmm2 748; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm3 749; AVX512F-NEXT: vpshufb %xmm4, %xmm3, %xmm3 750; AVX512F-NEXT: vpshufb %xmm5, %xmm0, %xmm4 751; AVX512F-NEXT: vpor %xmm3, %xmm4, %xmm3 752; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm0 753; AVX512F-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,u,u,u,u,u,u,u,u,0,2,6,8,12,14,18,20,24,26,30,u,u,u,u,u,u,u,u,u,u,u] 754; AVX512F-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0,1,2,3,4],xmm0[5,6,7] 755; AVX512F-NEXT: vpblendd {{.*#+}} ymm0 = ymm3[0,1,2,3],ymm0[4,5,6,7] 756; AVX512F-NEXT: vpshufb {{.*#+}} xmm3 = xmm1[u,u,u,u,u,0,4,6,10,12],zero,zero,zero,zero,zero,zero 757; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm1 758; AVX512F-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[u,u,u,u,u],zero,zero,zero,zero,zero,xmm1[0,2,6,8,12,14] 759; AVX512F-NEXT: vpor %xmm3, %xmm1, %xmm1 760; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 761; AVX512F-NEXT: vpmovsxwd {{.*#+}} ymm3 = [4294967295,4294967295,4294967295,4294967295,4294967295,255,0,0] 762; AVX512F-NEXT: vpblendvb %ymm3, %ymm0, %ymm1, %ymm0 763; AVX512F-NEXT: vinserti32x4 $2, %xmm2, %zmm0, %zmm0 764; AVX512F-NEXT: retq 765; 766; AVX512BW-LABEL: shuffle_v64i8_02_04_08_10_14_16_20_22_26_28_32_34_38_40_44_46_50_52_56_58_62_64_68_70_74_76_80_82_86_88_92_94_98_100_104_106_110_112_116_118_122_124_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u: 767; AVX512BW: # %bb.0: 768; AVX512BW-NEXT: vextracti64x4 $1, %zmm1, %ymm2 769; AVX512BW-NEXT: vextracti128 $1, %ymm2, %xmm3 770; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm4 = [128,128,128,128,128,0,4,6,10,12,u,u,u,u,u,u] 771; AVX512BW-NEXT: vpshufb %xmm4, %xmm3, %xmm3 772; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm5 = [2,4,8,10,14,128,128,128,128,128,u,u,u,u,u,u] 773; AVX512BW-NEXT: vpshufb %xmm5, %xmm2, %xmm2 774; AVX512BW-NEXT: vpor %xmm3, %xmm2, %xmm2 775; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm3 776; AVX512BW-NEXT: vpshufb %xmm4, %xmm3, %xmm3 777; AVX512BW-NEXT: vpshufb %xmm5, %xmm0, %xmm4 778; AVX512BW-NEXT: vpor %xmm3, %xmm4, %xmm3 779; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm0 780; AVX512BW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,u,u,u,u,u,u,u,u,0,2,6,8,12,14,18,20,24,26,30,u,u,u,u,u,u,u,u,u,u,u] 781; AVX512BW-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0,1,2,3,4],xmm0[5,6,7] 782; AVX512BW-NEXT: vpblendd {{.*#+}} ymm0 = ymm3[0,1,2,3],ymm0[4,5,6,7] 783; AVX512BW-NEXT: vpshufb {{.*#+}} xmm3 = xmm1[u,u,u,u,u,0,4,6,10,12],zero,zero,zero,zero,zero,zero 784; AVX512BW-NEXT: vextracti128 $1, %ymm1, %xmm1 785; AVX512BW-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[u,u,u,u,u],zero,zero,zero,zero,zero,xmm1[0,2,6,8,12,14] 786; AVX512BW-NEXT: vpor %xmm3, %xmm1, %xmm1 787; AVX512BW-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 788; AVX512BW-NEXT: vpmovsxwd {{.*#+}} ymm3 = [4294967295,4294967295,4294967295,4294967295,4294967295,255,0,0] 789; AVX512BW-NEXT: vpblendvb %ymm3, %ymm0, %ymm1, %ymm0 790; AVX512BW-NEXT: vinserti32x4 $2, %xmm2, %zmm0, %zmm0 791; AVX512BW-NEXT: retq 792; 793; AVX512DQ-LABEL: shuffle_v64i8_02_04_08_10_14_16_20_22_26_28_32_34_38_40_44_46_50_52_56_58_62_64_68_70_74_76_80_82_86_88_92_94_98_100_104_106_110_112_116_118_122_124_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u: 794; AVX512DQ: # %bb.0: 795; AVX512DQ-NEXT: vextracti64x4 $1, %zmm1, %ymm2 796; AVX512DQ-NEXT: vextracti128 $1, %ymm2, %xmm3 797; AVX512DQ-NEXT: vmovdqa {{.*#+}} xmm4 = [128,128,128,128,128,0,4,6,10,12,u,u,u,u,u,u] 798; AVX512DQ-NEXT: vpshufb %xmm4, %xmm3, %xmm3 799; AVX512DQ-NEXT: vmovdqa {{.*#+}} xmm5 = [2,4,8,10,14,128,128,128,128,128,u,u,u,u,u,u] 800; AVX512DQ-NEXT: vpshufb %xmm5, %xmm2, %xmm2 801; AVX512DQ-NEXT: vpor %xmm3, %xmm2, %xmm2 802; AVX512DQ-NEXT: vextracti128 $1, %ymm0, %xmm3 803; AVX512DQ-NEXT: vpshufb %xmm4, %xmm3, %xmm3 804; AVX512DQ-NEXT: vpshufb %xmm5, %xmm0, %xmm4 805; AVX512DQ-NEXT: vpor %xmm3, %xmm4, %xmm3 806; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm0 807; AVX512DQ-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,u,u,u,u,u,u,u,u,0,2,6,8,12,14,18,20,24,26,30,u,u,u,u,u,u,u,u,u,u,u] 808; AVX512DQ-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0,1,2,3,4],xmm0[5,6,7] 809; AVX512DQ-NEXT: vpblendd {{.*#+}} ymm0 = ymm3[0,1,2,3],ymm0[4,5,6,7] 810; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm3 = xmm1[u,u,u,u,u,0,4,6,10,12],zero,zero,zero,zero,zero,zero 811; AVX512DQ-NEXT: vextracti128 $1, %ymm1, %xmm1 812; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[u,u,u,u,u],zero,zero,zero,zero,zero,xmm1[0,2,6,8,12,14] 813; AVX512DQ-NEXT: vpor %xmm3, %xmm1, %xmm1 814; AVX512DQ-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 815; AVX512DQ-NEXT: vpmovsxwd {{.*#+}} ymm3 = [4294967295,4294967295,4294967295,4294967295,4294967295,255,0,0] 816; AVX512DQ-NEXT: vpblendvb %ymm3, %ymm0, %ymm1, %ymm0 817; AVX512DQ-NEXT: vinserti32x4 $2, %xmm2, %zmm0, %zmm0 818; AVX512DQ-NEXT: retq 819; 820; AVX512VBMI-LABEL: shuffle_v64i8_02_04_08_10_14_16_20_22_26_28_32_34_38_40_44_46_50_52_56_58_62_64_68_70_74_76_80_82_86_88_92_94_98_100_104_106_110_112_116_118_122_124_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u: 821; AVX512VBMI: # %bb.0: 822; AVX512VBMI-NEXT: vmovdqa64 {{.*#+}} zmm2 = [2,4,8,10,14,16,20,22,26,28,32,34,38,40,44,46,50,52,56,58,62,64,68,70,74,76,80,82,86,88,92,94,98,100,104,106,110,112,116,118,122,124,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u] 823; AVX512VBMI-NEXT: vpermt2b %zmm1, %zmm2, %zmm0 824; AVX512VBMI-NEXT: retq 825 %r = shufflevector <64 x i8> %a0, <64 x i8> %a1, <64 x i32> <i32 2, i32 4, i32 8, i32 10, i32 14, i32 16, i32 20, i32 22, i32 26, i32 28, i32 32, i32 34, i32 38, i32 40, i32 44, i32 46, i32 50, i32 52, i32 56, i32 58, i32 62, i32 64, i32 68, i32 70, i32 74, i32 76, i32 80, i32 82, i32 86, i32 88, i32 92, i32 94, i32 98, i32 100, i32 104, i32 106, i32 110, i32 112, i32 116, i32 118, i32 122, i32 124, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 826 ret <64 x i8> %r 827} 828 829define <64 x i8> @shuffle_v64i8_00_04_06_10_12_16_18_22_24_28_30_34_36_40_42_46_48_52_54_58_60_64_66_70_72_76_78_82_84_88_90_94_96_100_102_106_108_112_114_118_120_124_126_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u(<64 x i8> %a0, <64 x i8> %a1) { 830; AVX512F-LABEL: shuffle_v64i8_00_04_06_10_12_16_18_22_24_28_30_34_36_40_42_46_48_52_54_58_60_64_66_70_72_76_78_82_84_88_90_94_96_100_102_106_108_112_114_118_120_124_126_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u: 831; AVX512F: # %bb.0: 832; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm2 833; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [0,4,6,10,12,128,128,128,128,128,128,u,u,u,u,u] 834; AVX512F-NEXT: vpshufb %xmm3, %xmm2, %xmm4 835; AVX512F-NEXT: vextracti128 $1, %ymm2, %xmm2 836; AVX512F-NEXT: vmovdqa {{.*#+}} xmm5 = [128,128,128,128,128,0,2,6,8,12,14,u,u,u,u,u] 837; AVX512F-NEXT: vpshufb %xmm5, %xmm2, %xmm2 838; AVX512F-NEXT: vpor %xmm4, %xmm2, %xmm2 839; AVX512F-NEXT: vpshufb %xmm3, %xmm0, %xmm3 840; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm4 841; AVX512F-NEXT: vpshufb %xmm5, %xmm4, %xmm4 842; AVX512F-NEXT: vpor %xmm3, %xmm4, %xmm3 843; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm0 844; AVX512F-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,u,u,u,u,u,u,u,u,u,2,4,8,10,14,16,20,22,26,28,u,u,u,u,u,u,u,u,u,u,u] 845; AVX512F-NEXT: vpmovsxdq {{.*#+}} ymm4 = [18446744073709551615,16777215,0,0] 846; AVX512F-NEXT: vpblendvb %ymm4, %ymm3, %ymm0, %ymm0 847; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm3 848; AVX512F-NEXT: vpshufb {{.*#+}} xmm3 = xmm3[u,u,u,u,u],zero,zero,zero,zero,zero,zero,xmm3[2,4,8,10,14] 849; AVX512F-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[u,u,u,u,u,0,2,6,8,12,14],zero,zero,zero,zero,zero 850; AVX512F-NEXT: vpor %xmm3, %xmm1, %xmm1 851; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 852; AVX512F-NEXT: vpmovsxwd {{.*#+}} ymm3 = [4294967295,4294967295,4294967295,4294967295,4294967295,255,0,0] 853; AVX512F-NEXT: vpblendvb %ymm3, %ymm0, %ymm1, %ymm0 854; AVX512F-NEXT: vinserti32x4 $2, %xmm2, %zmm0, %zmm0 855; AVX512F-NEXT: retq 856; 857; AVX512BW-LABEL: shuffle_v64i8_00_04_06_10_12_16_18_22_24_28_30_34_36_40_42_46_48_52_54_58_60_64_66_70_72_76_78_82_84_88_90_94_96_100_102_106_108_112_114_118_120_124_126_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u: 858; AVX512BW: # %bb.0: 859; AVX512BW-NEXT: vextracti64x4 $1, %zmm1, %ymm2 860; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [0,4,6,10,12,128,128,128,128,128,128,u,u,u,u,u] 861; AVX512BW-NEXT: vpshufb %xmm3, %xmm2, %xmm4 862; AVX512BW-NEXT: vextracti128 $1, %ymm2, %xmm2 863; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm5 = [128,128,128,128,128,0,2,6,8,12,14,u,u,u,u,u] 864; AVX512BW-NEXT: vpshufb %xmm5, %xmm2, %xmm2 865; AVX512BW-NEXT: vpor %xmm4, %xmm2, %xmm2 866; AVX512BW-NEXT: vpshufb %xmm3, %xmm0, %xmm3 867; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm4 868; AVX512BW-NEXT: vpshufb %xmm5, %xmm4, %xmm4 869; AVX512BW-NEXT: vpor %xmm3, %xmm4, %xmm3 870; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm0 871; AVX512BW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,u,u,u,u,u,u,u,u,u,2,4,8,10,14,16,20,22,26,28,u,u,u,u,u,u,u,u,u,u,u] 872; AVX512BW-NEXT: vpmovsxdq {{.*#+}} ymm4 = [18446744073709551615,16777215,0,0] 873; AVX512BW-NEXT: vpblendvb %ymm4, %ymm3, %ymm0, %ymm0 874; AVX512BW-NEXT: vextracti128 $1, %ymm1, %xmm3 875; AVX512BW-NEXT: vpshufb {{.*#+}} xmm3 = xmm3[u,u,u,u,u],zero,zero,zero,zero,zero,zero,xmm3[2,4,8,10,14] 876; AVX512BW-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[u,u,u,u,u,0,2,6,8,12,14],zero,zero,zero,zero,zero 877; AVX512BW-NEXT: vpor %xmm3, %xmm1, %xmm1 878; AVX512BW-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 879; AVX512BW-NEXT: vpmovsxwd {{.*#+}} ymm3 = [4294967295,4294967295,4294967295,4294967295,4294967295,255,0,0] 880; AVX512BW-NEXT: vpblendvb %ymm3, %ymm0, %ymm1, %ymm0 881; AVX512BW-NEXT: vinserti32x4 $2, %xmm2, %zmm0, %zmm0 882; AVX512BW-NEXT: retq 883; 884; AVX512DQ-LABEL: shuffle_v64i8_00_04_06_10_12_16_18_22_24_28_30_34_36_40_42_46_48_52_54_58_60_64_66_70_72_76_78_82_84_88_90_94_96_100_102_106_108_112_114_118_120_124_126_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u: 885; AVX512DQ: # %bb.0: 886; AVX512DQ-NEXT: vextracti64x4 $1, %zmm1, %ymm2 887; AVX512DQ-NEXT: vmovdqa {{.*#+}} xmm3 = [0,4,6,10,12,128,128,128,128,128,128,u,u,u,u,u] 888; AVX512DQ-NEXT: vpshufb %xmm3, %xmm2, %xmm4 889; AVX512DQ-NEXT: vextracti128 $1, %ymm2, %xmm2 890; AVX512DQ-NEXT: vmovdqa {{.*#+}} xmm5 = [128,128,128,128,128,0,2,6,8,12,14,u,u,u,u,u] 891; AVX512DQ-NEXT: vpshufb %xmm5, %xmm2, %xmm2 892; AVX512DQ-NEXT: vpor %xmm4, %xmm2, %xmm2 893; AVX512DQ-NEXT: vpshufb %xmm3, %xmm0, %xmm3 894; AVX512DQ-NEXT: vextracti128 $1, %ymm0, %xmm4 895; AVX512DQ-NEXT: vpshufb %xmm5, %xmm4, %xmm4 896; AVX512DQ-NEXT: vpor %xmm3, %xmm4, %xmm3 897; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm0 898; AVX512DQ-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,u,u,u,u,u,u,u,u,u,2,4,8,10,14,16,20,22,26,28,u,u,u,u,u,u,u,u,u,u,u] 899; AVX512DQ-NEXT: vpmovsxdq {{.*#+}} ymm4 = [18446744073709551615,16777215,0,0] 900; AVX512DQ-NEXT: vpblendvb %ymm4, %ymm3, %ymm0, %ymm0 901; AVX512DQ-NEXT: vextracti128 $1, %ymm1, %xmm3 902; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm3 = xmm3[u,u,u,u,u],zero,zero,zero,zero,zero,zero,xmm3[2,4,8,10,14] 903; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[u,u,u,u,u,0,2,6,8,12,14],zero,zero,zero,zero,zero 904; AVX512DQ-NEXT: vpor %xmm3, %xmm1, %xmm1 905; AVX512DQ-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 906; AVX512DQ-NEXT: vpmovsxwd {{.*#+}} ymm3 = [4294967295,4294967295,4294967295,4294967295,4294967295,255,0,0] 907; AVX512DQ-NEXT: vpblendvb %ymm3, %ymm0, %ymm1, %ymm0 908; AVX512DQ-NEXT: vinserti32x4 $2, %xmm2, %zmm0, %zmm0 909; AVX512DQ-NEXT: retq 910; 911; AVX512VBMI-LABEL: shuffle_v64i8_00_04_06_10_12_16_18_22_24_28_30_34_36_40_42_46_48_52_54_58_60_64_66_70_72_76_78_82_84_88_90_94_96_100_102_106_108_112_114_118_120_124_126_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u: 912; AVX512VBMI: # %bb.0: 913; AVX512VBMI-NEXT: vmovdqa64 {{.*#+}} zmm2 = [64,68,70,74,76,80,82,86,88,92,94,98,100,104,106,110,112,116,118,122,124,0,2,6,8,12,14,18,20,24,26,30,32,36,38,42,44,48,50,54,56,60,62,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u] 914; AVX512VBMI-NEXT: vpermi2b %zmm0, %zmm1, %zmm2 915; AVX512VBMI-NEXT: vmovdqa64 %zmm2, %zmm0 916; AVX512VBMI-NEXT: retq 917 %r = shufflevector <64 x i8> %a0, <64 x i8> %a1, <64 x i32> <i32 0, i32 4, i32 6, i32 10, i32 12, i32 16, i32 18, i32 22, i32 24, i32 28, i32 30, i32 34, i32 36, i32 40, i32 42, i32 46, i32 48, i32 52, i32 54, i32 58, i32 60, i32 64, i32 66, i32 70, i32 72, i32 76, i32 78, i32 82, i32 84, i32 88, i32 90, i32 94, i32 96, i32 100, i32 102, i32 106, i32 108, i32 112, i32 114, i32 118, i32 120, i32 124, i32 126, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 918 ret <64 x i8> %r 919} 920 921define <64 x i8> @shuffle_v64i8_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_32_33_34_35_36_37_38_39_40_41_42_65_69_71_75_77_81_83_87_89_93_95_99_101_105_107_111_113_117_119_123_125(<64 x i8> %a0, <64 x i8> %a1) { 922; AVX512F-LABEL: shuffle_v64i8_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_32_33_34_35_36_37_38_39_40_41_42_65_69_71_75_77_81_83_87_89_93_95_99_101_105_107_111_113_117_119_123_125: 923; AVX512F: # %bb.0: 924; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm2 925; AVX512F-NEXT: vextracti128 $1, %ymm2, %xmm3 926; AVX512F-NEXT: vpshufb {{.*#+}} xmm3 = xmm3[u,u,u,u,u,u],zero,zero,zero,zero,zero,xmm3[1,5,7,11,13] 927; AVX512F-NEXT: vpshufb {{.*#+}} xmm2 = xmm2[u,u,u,u,u,u,3,5,9,11,15],zero,zero,zero,zero,zero 928; AVX512F-NEXT: vpor %xmm3, %xmm2, %xmm2 929; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 930; AVX512F-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,u,u,u,u,u,u,u,1,5,7,11,13,17,19,23,25,29,31,u,u,u,u,u,u,u,u,u,u] 931; AVX512F-NEXT: vpblendw {{.*#+}} ymm2 = ymm1[0,1,2],ymm2[3,4,5,6,7],ymm1[8,9,10],ymm2[11,12,13,14,15] 932; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm2[4,5,6,7] 933; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm2 934; AVX512F-NEXT: vpmovsxdq {{.*#+}} ymm3 = [0,18446744073692774400,18446744073709551615,18446744073709551615] 935; AVX512F-NEXT: vpblendvb %ymm3, %ymm1, %ymm2, %ymm1 936; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 937; AVX512F-NEXT: retq 938; 939; AVX512BW-LABEL: shuffle_v64i8_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_32_33_34_35_36_37_38_39_40_41_42_65_69_71_75_77_81_83_87_89_93_95_99_101_105_107_111_113_117_119_123_125: 940; AVX512BW: # %bb.0: 941; AVX512BW-NEXT: vextracti64x4 $1, %zmm1, %ymm2 942; AVX512BW-NEXT: vextracti128 $1, %ymm2, %xmm3 943; AVX512BW-NEXT: vpshufb {{.*#+}} xmm3 = xmm3[u,u,u,u,u,u],zero,zero,zero,zero,zero,xmm3[1,5,7,11,13] 944; AVX512BW-NEXT: vpshufb {{.*#+}} xmm2 = xmm2[u,u,u,u,u,u,3,5,9,11,15],zero,zero,zero,zero,zero 945; AVX512BW-NEXT: vpor %xmm3, %xmm2, %xmm2 946; AVX512BW-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 947; AVX512BW-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,u,u,u,u,u,u,u,1,5,7,11,13,17,19,23,25,29,31,u,u,u,u,u,u,u,u,u,u] 948; AVX512BW-NEXT: vpblendw {{.*#+}} ymm2 = ymm1[0,1,2],ymm2[3,4,5,6,7],ymm1[8,9,10],ymm2[11,12,13,14,15] 949; AVX512BW-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm2[4,5,6,7] 950; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm2 951; AVX512BW-NEXT: vpmovsxdq {{.*#+}} ymm3 = [0,18446744073692774400,18446744073709551615,18446744073709551615] 952; AVX512BW-NEXT: vpblendvb %ymm3, %ymm1, %ymm2, %ymm1 953; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 954; AVX512BW-NEXT: retq 955; 956; AVX512DQ-LABEL: shuffle_v64i8_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_32_33_34_35_36_37_38_39_40_41_42_65_69_71_75_77_81_83_87_89_93_95_99_101_105_107_111_113_117_119_123_125: 957; AVX512DQ: # %bb.0: 958; AVX512DQ-NEXT: vextracti64x4 $1, %zmm1, %ymm2 959; AVX512DQ-NEXT: vextracti128 $1, %ymm2, %xmm3 960; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm3 = xmm3[u,u,u,u,u,u],zero,zero,zero,zero,zero,xmm3[1,5,7,11,13] 961; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm2 = xmm2[u,u,u,u,u,u,3,5,9,11,15],zero,zero,zero,zero,zero 962; AVX512DQ-NEXT: vpor %xmm3, %xmm2, %xmm2 963; AVX512DQ-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 964; AVX512DQ-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,u,u,u,u,u,u,u,1,5,7,11,13,17,19,23,25,29,31,u,u,u,u,u,u,u,u,u,u] 965; AVX512DQ-NEXT: vpblendw {{.*#+}} ymm2 = ymm1[0,1,2],ymm2[3,4,5,6,7],ymm1[8,9,10],ymm2[11,12,13,14,15] 966; AVX512DQ-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm2[4,5,6,7] 967; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm2 968; AVX512DQ-NEXT: vpmovsxdq {{.*#+}} ymm3 = [0,18446744073692774400,18446744073709551615,18446744073709551615] 969; AVX512DQ-NEXT: vpblendvb %ymm3, %ymm1, %ymm2, %ymm1 970; AVX512DQ-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 971; AVX512DQ-NEXT: retq 972; 973; AVX512VBMI-LABEL: shuffle_v64i8_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_32_33_34_35_36_37_38_39_40_41_42_65_69_71_75_77_81_83_87_89_93_95_99_101_105_107_111_113_117_119_123_125: 974; AVX512VBMI: # %bb.0: 975; AVX512VBMI-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,65,69,71,75,77,81,83,87,89,93,95,99,101,105,107,111,113,117,119,123,125] 976; AVX512VBMI-NEXT: vpermt2b %zmm1, %zmm2, %zmm0 977; AVX512VBMI-NEXT: retq 978 %r = shufflevector <64 x i8> %a0, <64 x i8> %a1, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 65, i32 69, i32 71, i32 75, i32 77, i32 81, i32 83, i32 87, i32 89, i32 93, i32 95, i32 99, i32 101, i32 105, i32 107, i32 111, i32 113, i32 117, i32 119, i32 123, i32 125> 979 ret <64 x i8> %r 980} 981 982define <64 x i8> @shuffle_v64i8_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_32_33_34_35_36_37_38_39_40_41_42_67_69_73_75_79_81_85_87_91_93_97_99_103_105_109_111_115_117_121_123_127(<64 x i8> %a0, <64 x i8> %a1) { 983; AVX512F-LABEL: shuffle_v64i8_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_32_33_34_35_36_37_38_39_40_41_42_67_69_73_75_79_81_85_87_91_93_97_99_103_105_109_111_115_117_121_123_127: 984; AVX512F: # %bb.0: 985; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm2 986; AVX512F-NEXT: vextracti128 $1, %ymm2, %xmm3 987; AVX512F-NEXT: vpshufb {{.*#+}} xmm3 = xmm3[u,u,u,u,u],zero,zero,zero,zero,zero,zero,xmm3[3,5,9,11,15] 988; AVX512F-NEXT: vpshufb {{.*#+}} xmm2 = xmm2[u,u,u,u,u,1,3,7,9,13,15],zero,zero,zero,zero,zero 989; AVX512F-NEXT: vpor %xmm3, %xmm2, %xmm2 990; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 991; AVX512F-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,u,u,u,u,u,u,u,3,5,9,11,15,17,21,23,27,29,u,u,u,u,u,u,u,u,u,u,u] 992; AVX512F-NEXT: vpmovsxwd {{.*#+}} ymm3 = [0,0,0,0,0,4294967040,4294967295,4294967295] 993; AVX512F-NEXT: vpblendvb %ymm3, %ymm2, %ymm1, %ymm1 994; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm2 995; AVX512F-NEXT: vpmovsxdq {{.*#+}} ymm3 = [0,18446744073692774400,18446744073709551615,18446744073709551615] 996; AVX512F-NEXT: vpblendvb %ymm3, %ymm1, %ymm2, %ymm1 997; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 998; AVX512F-NEXT: retq 999; 1000; AVX512BW-LABEL: shuffle_v64i8_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_32_33_34_35_36_37_38_39_40_41_42_67_69_73_75_79_81_85_87_91_93_97_99_103_105_109_111_115_117_121_123_127: 1001; AVX512BW: # %bb.0: 1002; AVX512BW-NEXT: vextracti64x4 $1, %zmm1, %ymm2 1003; AVX512BW-NEXT: vextracti128 $1, %ymm2, %xmm3 1004; AVX512BW-NEXT: vpshufb {{.*#+}} xmm3 = xmm3[u,u,u,u,u],zero,zero,zero,zero,zero,zero,xmm3[3,5,9,11,15] 1005; AVX512BW-NEXT: vpshufb {{.*#+}} xmm2 = xmm2[u,u,u,u,u,1,3,7,9,13,15],zero,zero,zero,zero,zero 1006; AVX512BW-NEXT: vpor %xmm3, %xmm2, %xmm2 1007; AVX512BW-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 1008; AVX512BW-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,u,u,u,u,u,u,u,3,5,9,11,15,17,21,23,27,29,u,u,u,u,u,u,u,u,u,u,u] 1009; AVX512BW-NEXT: vpmovsxwd {{.*#+}} ymm3 = [0,0,0,0,0,4294967040,4294967295,4294967295] 1010; AVX512BW-NEXT: vpblendvb %ymm3, %ymm2, %ymm1, %ymm1 1011; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm2 1012; AVX512BW-NEXT: vpmovsxdq {{.*#+}} ymm3 = [0,18446744073692774400,18446744073709551615,18446744073709551615] 1013; AVX512BW-NEXT: vpblendvb %ymm3, %ymm1, %ymm2, %ymm1 1014; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 1015; AVX512BW-NEXT: retq 1016; 1017; AVX512DQ-LABEL: shuffle_v64i8_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_32_33_34_35_36_37_38_39_40_41_42_67_69_73_75_79_81_85_87_91_93_97_99_103_105_109_111_115_117_121_123_127: 1018; AVX512DQ: # %bb.0: 1019; AVX512DQ-NEXT: vextracti64x4 $1, %zmm1, %ymm2 1020; AVX512DQ-NEXT: vextracti128 $1, %ymm2, %xmm3 1021; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm3 = xmm3[u,u,u,u,u],zero,zero,zero,zero,zero,zero,xmm3[3,5,9,11,15] 1022; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm2 = xmm2[u,u,u,u,u,1,3,7,9,13,15],zero,zero,zero,zero,zero 1023; AVX512DQ-NEXT: vpor %xmm3, %xmm2, %xmm2 1024; AVX512DQ-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 1025; AVX512DQ-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,u,u,u,u,u,u,u,3,5,9,11,15,17,21,23,27,29,u,u,u,u,u,u,u,u,u,u,u] 1026; AVX512DQ-NEXT: vpmovsxwd {{.*#+}} ymm3 = [0,0,0,0,0,4294967040,4294967295,4294967295] 1027; AVX512DQ-NEXT: vpblendvb %ymm3, %ymm2, %ymm1, %ymm1 1028; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm2 1029; AVX512DQ-NEXT: vpmovsxdq {{.*#+}} ymm3 = [0,18446744073692774400,18446744073709551615,18446744073709551615] 1030; AVX512DQ-NEXT: vpblendvb %ymm3, %ymm1, %ymm2, %ymm1 1031; AVX512DQ-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 1032; AVX512DQ-NEXT: retq 1033; 1034; AVX512VBMI-LABEL: shuffle_v64i8_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_32_33_34_35_36_37_38_39_40_41_42_67_69_73_75_79_81_85_87_91_93_97_99_103_105_109_111_115_117_121_123_127: 1035; AVX512VBMI: # %bb.0: 1036; AVX512VBMI-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,67,69,73,75,79,81,85,87,91,93,97,99,103,105,109,111,115,117,121,123,127] 1037; AVX512VBMI-NEXT: vpermt2b %zmm1, %zmm2, %zmm0 1038; AVX512VBMI-NEXT: retq 1039 %r = shufflevector <64 x i8> %a0, <64 x i8> %a1, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 67, i32 69, i32 73, i32 75, i32 79, i32 81, i32 85, i32 87, i32 91, i32 93, i32 97, i32 99, i32 103, i32 105, i32 109, i32 111, i32 115, i32 117, i32 121, i32 123, i32 127> 1040 ret <64 x i8> %r 1041} 1042 1043define <64 x i8> @shuffle_v64i8_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_32_33_34_35_36_37_38_39_40_41_64_66_70_72_76_78_82_84_88_90_94_96_100_102_106_108_112_114_118_120_124_126(<64 x i8> %a0, <64 x i8> %a1) { 1044; AVX512F-LABEL: shuffle_v64i8_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_32_33_34_35_36_37_38_39_40_41_64_66_70_72_76_78_82_84_88_90_94_96_100_102_106_108_112_114_118_120_124_126: 1045; AVX512F: # %bb.0: 1046; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm2 1047; AVX512F-NEXT: vpshufb {{.*#+}} xmm3 = xmm2[u,u,u,u,u,0,4,6,10,12],zero,zero,zero,zero,zero,zero 1048; AVX512F-NEXT: vextracti128 $1, %ymm2, %xmm2 1049; AVX512F-NEXT: vpshufb {{.*#+}} xmm2 = xmm2[u,u,u,u,u],zero,zero,zero,zero,zero,xmm2[0,2,6,8,12,14] 1050; AVX512F-NEXT: vpor %xmm3, %xmm2, %xmm2 1051; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 1052; AVX512F-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,u,u,u,u,u,u,0,2,6,8,12,14,18,20,24,26,30,u,u,u,u,u,u,u,u,u,u,u] 1053; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [u,u,u,u,u,u,u,u,u,u,255,255,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0] 1054; AVX512F-NEXT: vpblendvb %ymm3, %ymm1, %ymm2, %ymm1 1055; AVX512F-NEXT: vextracti32x4 $2, %zmm0, %xmm2 1056; AVX512F-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3,4],xmm1[5,6,7] 1057; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] 1058; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 1059; AVX512F-NEXT: retq 1060; 1061; AVX512BW-LABEL: shuffle_v64i8_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_32_33_34_35_36_37_38_39_40_41_64_66_70_72_76_78_82_84_88_90_94_96_100_102_106_108_112_114_118_120_124_126: 1062; AVX512BW: # %bb.0: 1063; AVX512BW-NEXT: vextracti64x4 $1, %zmm1, %ymm2 1064; AVX512BW-NEXT: vpshufb {{.*#+}} xmm3 = xmm2[u,u,u,u,u,0,4,6,10,12],zero,zero,zero,zero,zero,zero 1065; AVX512BW-NEXT: vextracti128 $1, %ymm2, %xmm2 1066; AVX512BW-NEXT: vpshufb {{.*#+}} xmm2 = xmm2[u,u,u,u,u],zero,zero,zero,zero,zero,xmm2[0,2,6,8,12,14] 1067; AVX512BW-NEXT: vpor %xmm3, %xmm2, %xmm2 1068; AVX512BW-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 1069; AVX512BW-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,u,u,u,u,u,u,0,2,6,8,12,14,18,20,24,26,30,u,u,u,u,u,u,u,u,u,u,u] 1070; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm3 = [u,u,u,u,u,u,u,u,u,u,255,255,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0] 1071; AVX512BW-NEXT: vpblendvb %ymm3, %ymm1, %ymm2, %ymm1 1072; AVX512BW-NEXT: vextracti32x4 $2, %zmm0, %xmm2 1073; AVX512BW-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3,4],xmm1[5,6,7] 1074; AVX512BW-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] 1075; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 1076; AVX512BW-NEXT: retq 1077; 1078; AVX512DQ-LABEL: shuffle_v64i8_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_32_33_34_35_36_37_38_39_40_41_64_66_70_72_76_78_82_84_88_90_94_96_100_102_106_108_112_114_118_120_124_126: 1079; AVX512DQ: # %bb.0: 1080; AVX512DQ-NEXT: vextracti64x4 $1, %zmm1, %ymm2 1081; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm3 = xmm2[u,u,u,u,u,0,4,6,10,12],zero,zero,zero,zero,zero,zero 1082; AVX512DQ-NEXT: vextracti128 $1, %ymm2, %xmm2 1083; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm2 = xmm2[u,u,u,u,u],zero,zero,zero,zero,zero,xmm2[0,2,6,8,12,14] 1084; AVX512DQ-NEXT: vpor %xmm3, %xmm2, %xmm2 1085; AVX512DQ-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 1086; AVX512DQ-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,u,u,u,u,u,u,0,2,6,8,12,14,18,20,24,26,30,u,u,u,u,u,u,u,u,u,u,u] 1087; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm3 = [u,u,u,u,u,u,u,u,u,u,255,255,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0] 1088; AVX512DQ-NEXT: vpblendvb %ymm3, %ymm1, %ymm2, %ymm1 1089; AVX512DQ-NEXT: vextracti32x4 $2, %zmm0, %xmm2 1090; AVX512DQ-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3,4],xmm1[5,6,7] 1091; AVX512DQ-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] 1092; AVX512DQ-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 1093; AVX512DQ-NEXT: retq 1094; 1095; AVX512VBMI-LABEL: shuffle_v64i8_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_32_33_34_35_36_37_38_39_40_41_64_66_70_72_76_78_82_84_88_90_94_96_100_102_106_108_112_114_118_120_124_126: 1096; AVX512VBMI: # %bb.0: 1097; AVX512VBMI-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,64,66,70,72,76,78,82,84,88,90,94,96,100,102,106,108,112,114,118,120,124,126] 1098; AVX512VBMI-NEXT: vpermt2b %zmm1, %zmm2, %zmm0 1099; AVX512VBMI-NEXT: retq 1100 %r = shufflevector <64 x i8> %a0, <64 x i8> %a1, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 64, i32 66, i32 70, i32 72, i32 76, i32 78, i32 82, i32 84, i32 88, i32 90, i32 94, i32 96, i32 100, i32 102, i32 106, i32 108, i32 112, i32 114, i32 118, i32 120, i32 124, i32 126> 1101 ret <64 x i8> %r 1102} 1103 1104define <64 x i8> @shuffle_v64i8_61_62_63_64_65_66_67_68_69_70_71_72_73_74_75_76_77_78_79_80_81_82_83_84_85_86_87_88_89_90_91_92_93_94_95_96_97_98_99_100_101_102_103_104_105_106_107_108_109_110_111_112_113_114_115_116_117_118_119_120_121_122_123_124(<64 x i8> %a0, <64 x i8> %a1) { 1105; AVX512F-LABEL: shuffle_v64i8_61_62_63_64_65_66_67_68_69_70_71_72_73_74_75_76_77_78_79_80_81_82_83_84_85_86_87_88_89_90_91_92_93_94_95_96_97_98_99_100_101_102_103_104_105_106_107_108_109_110_111_112_113_114_115_116_117_118_119_120_121_122_123_124: 1106; AVX512F: # %bb.0: 1107; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm2 1108; AVX512F-NEXT: vperm2i128 {{.*#+}} ymm3 = ymm0[2,3],ymm2[0,1] 1109; AVX512F-NEXT: vpalignr {{.*#+}} ymm2 = ymm3[13,14,15],ymm2[0,1,2,3,4,5,6,7,8,9,10,11,12],ymm3[29,30,31],ymm2[16,17,18,19,20,21,22,23,24,25,26,27,28] 1110; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm1 1111; AVX512F-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm1[2,3],ymm0[0,1] 1112; AVX512F-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[13,14,15],ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12],ymm1[29,30,31],ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28] 1113; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 1114; AVX512F-NEXT: retq 1115; 1116; AVX512BW-LABEL: shuffle_v64i8_61_62_63_64_65_66_67_68_69_70_71_72_73_74_75_76_77_78_79_80_81_82_83_84_85_86_87_88_89_90_91_92_93_94_95_96_97_98_99_100_101_102_103_104_105_106_107_108_109_110_111_112_113_114_115_116_117_118_119_120_121_122_123_124: 1117; AVX512BW: # %bb.0: 1118; AVX512BW-NEXT: valignq {{.*#+}} zmm1 = zmm1[6,7],zmm0[0,1,2,3,4,5] 1119; AVX512BW-NEXT: vpalignr {{.*#+}} zmm0 = zmm1[13,14,15],zmm0[0,1,2,3,4,5,6,7,8,9,10,11,12],zmm1[29,30,31],zmm0[16,17,18,19,20,21,22,23,24,25,26,27,28],zmm1[45,46,47],zmm0[32,33,34,35,36,37,38,39,40,41,42,43,44],zmm1[61,62,63],zmm0[48,49,50,51,52,53,54,55,56,57,58,59,60] 1120; AVX512BW-NEXT: retq 1121; 1122; AVX512DQ-LABEL: shuffle_v64i8_61_62_63_64_65_66_67_68_69_70_71_72_73_74_75_76_77_78_79_80_81_82_83_84_85_86_87_88_89_90_91_92_93_94_95_96_97_98_99_100_101_102_103_104_105_106_107_108_109_110_111_112_113_114_115_116_117_118_119_120_121_122_123_124: 1123; AVX512DQ: # %bb.0: 1124; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm2 1125; AVX512DQ-NEXT: vperm2i128 {{.*#+}} ymm3 = ymm0[2,3],ymm2[0,1] 1126; AVX512DQ-NEXT: vpalignr {{.*#+}} ymm2 = ymm3[13,14,15],ymm2[0,1,2,3,4,5,6,7,8,9,10,11,12],ymm3[29,30,31],ymm2[16,17,18,19,20,21,22,23,24,25,26,27,28] 1127; AVX512DQ-NEXT: vextracti64x4 $1, %zmm1, %ymm1 1128; AVX512DQ-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm1[2,3],ymm0[0,1] 1129; AVX512DQ-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[13,14,15],ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12],ymm1[29,30,31],ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28] 1130; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 1131; AVX512DQ-NEXT: retq 1132; 1133; AVX512VBMI-LABEL: shuffle_v64i8_61_62_63_64_65_66_67_68_69_70_71_72_73_74_75_76_77_78_79_80_81_82_83_84_85_86_87_88_89_90_91_92_93_94_95_96_97_98_99_100_101_102_103_104_105_106_107_108_109_110_111_112_113_114_115_116_117_118_119_120_121_122_123_124: 1134; AVX512VBMI: # %bb.0: 1135; AVX512VBMI-NEXT: valignq {{.*#+}} zmm1 = zmm1[6,7],zmm0[0,1,2,3,4,5] 1136; AVX512VBMI-NEXT: vpalignr {{.*#+}} zmm0 = zmm1[13,14,15],zmm0[0,1,2,3,4,5,6,7,8,9,10,11,12],zmm1[29,30,31],zmm0[16,17,18,19,20,21,22,23,24,25,26,27,28],zmm1[45,46,47],zmm0[32,33,34,35,36,37,38,39,40,41,42,43,44],zmm1[61,62,63],zmm0[48,49,50,51,52,53,54,55,56,57,58,59,60] 1137; AVX512VBMI-NEXT: retq 1138 %r = shufflevector <64 x i8> %a1, <64 x i8> %a0, <64 x i32> <i32 61, i32 62, i32 63, i32 64, i32 65, i32 66, i32 67, i32 68, i32 69, i32 70, i32 71, i32 72, i32 73, i32 74, i32 75, i32 76, i32 77, i32 78, i32 79, i32 80, i32 81, i32 82, i32 83, i32 84, i32 85, i32 86, i32 87, i32 88, i32 89, i32 90, i32 91, i32 92, i32 93, i32 94, i32 95, i32 96, i32 97, i32 98, i32 99, i32 100, i32 101, i32 102, i32 103, i32 104, i32 105, i32 106, i32 107, i32 108, i32 109, i32 110, i32 111, i32 112, i32 113, i32 114, i32 115, i32 116, i32 117, i32 118, i32 119, i32 120, i32 121, i32 122, i32 123, i32 124> 1139 ret <64 x i8> %r 1140} 1141 1142; PR79799 1143define <64 x i8> @shuffle_v64i8_62_63_64_65_66_67_68_69_70_71_72_73_74_75_76_77_78_79_80_81_82_83_84_85_86_87_88_89_90_91_92_93_94_95_96_97_98_99_100_101_102_103_104_105_106_107_108_109_110_111_112_113_114_115_116_117_118_119_120_121_122_123_124_125(<64 x i8> %a0, <64 x i8> %a1) { 1144; AVX512F-LABEL: shuffle_v64i8_62_63_64_65_66_67_68_69_70_71_72_73_74_75_76_77_78_79_80_81_82_83_84_85_86_87_88_89_90_91_92_93_94_95_96_97_98_99_100_101_102_103_104_105_106_107_108_109_110_111_112_113_114_115_116_117_118_119_120_121_122_123_124_125: 1145; AVX512F: # %bb.0: 1146; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm2 1147; AVX512F-NEXT: vperm2i128 {{.*#+}} ymm3 = ymm0[2,3],ymm2[0,1] 1148; AVX512F-NEXT: vpalignr {{.*#+}} ymm2 = ymm3[14,15],ymm2[0,1,2,3,4,5,6,7,8,9,10,11,12,13],ymm3[30,31],ymm2[16,17,18,19,20,21,22,23,24,25,26,27,28,29] 1149; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm1 1150; AVX512F-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm1[2,3],ymm0[0,1] 1151; AVX512F-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[14,15],ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13],ymm1[30,31],ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29] 1152; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 1153; AVX512F-NEXT: retq 1154; 1155; AVX512BW-LABEL: shuffle_v64i8_62_63_64_65_66_67_68_69_70_71_72_73_74_75_76_77_78_79_80_81_82_83_84_85_86_87_88_89_90_91_92_93_94_95_96_97_98_99_100_101_102_103_104_105_106_107_108_109_110_111_112_113_114_115_116_117_118_119_120_121_122_123_124_125: 1156; AVX512BW: # %bb.0: 1157; AVX512BW-NEXT: valignq {{.*#+}} zmm1 = zmm1[6,7],zmm0[0,1,2,3,4,5] 1158; AVX512BW-NEXT: vpalignr {{.*#+}} zmm0 = zmm1[14,15],zmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13],zmm1[30,31],zmm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29],zmm1[46,47],zmm0[32,33,34,35,36,37,38,39,40,41,42,43,44,45],zmm1[62,63],zmm0[48,49,50,51,52,53,54,55,56,57,58,59,60,61] 1159; AVX512BW-NEXT: retq 1160; 1161; AVX512DQ-LABEL: shuffle_v64i8_62_63_64_65_66_67_68_69_70_71_72_73_74_75_76_77_78_79_80_81_82_83_84_85_86_87_88_89_90_91_92_93_94_95_96_97_98_99_100_101_102_103_104_105_106_107_108_109_110_111_112_113_114_115_116_117_118_119_120_121_122_123_124_125: 1162; AVX512DQ: # %bb.0: 1163; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm2 1164; AVX512DQ-NEXT: vperm2i128 {{.*#+}} ymm3 = ymm0[2,3],ymm2[0,1] 1165; AVX512DQ-NEXT: vpalignr {{.*#+}} ymm2 = ymm3[14,15],ymm2[0,1,2,3,4,5,6,7,8,9,10,11,12,13],ymm3[30,31],ymm2[16,17,18,19,20,21,22,23,24,25,26,27,28,29] 1166; AVX512DQ-NEXT: vextracti64x4 $1, %zmm1, %ymm1 1167; AVX512DQ-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm1[2,3],ymm0[0,1] 1168; AVX512DQ-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[14,15],ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13],ymm1[30,31],ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29] 1169; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 1170; AVX512DQ-NEXT: retq 1171; 1172; AVX512VBMI-LABEL: shuffle_v64i8_62_63_64_65_66_67_68_69_70_71_72_73_74_75_76_77_78_79_80_81_82_83_84_85_86_87_88_89_90_91_92_93_94_95_96_97_98_99_100_101_102_103_104_105_106_107_108_109_110_111_112_113_114_115_116_117_118_119_120_121_122_123_124_125: 1173; AVX512VBMI: # %bb.0: 1174; AVX512VBMI-NEXT: valignq {{.*#+}} zmm1 = zmm1[6,7],zmm0[0,1,2,3,4,5] 1175; AVX512VBMI-NEXT: vpalignr {{.*#+}} zmm0 = zmm1[14,15],zmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13],zmm1[30,31],zmm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29],zmm1[46,47],zmm0[32,33,34,35,36,37,38,39,40,41,42,43,44,45],zmm1[62,63],zmm0[48,49,50,51,52,53,54,55,56,57,58,59,60,61] 1176; AVX512VBMI-NEXT: retq 1177 %r = shufflevector <64 x i8> %a1, <64 x i8> %a0, <64 x i32> <i32 62, i32 63, i32 64, i32 65, i32 66, i32 67, i32 68, i32 69, i32 70, i32 71, i32 72, i32 73, i32 74, i32 75, i32 76, i32 77, i32 78, i32 79, i32 80, i32 81, i32 82, i32 83, i32 84, i32 85, i32 86, i32 87, i32 88, i32 89, i32 90, i32 91, i32 92, i32 93, i32 94, i32 95, i32 96, i32 97, i32 98, i32 99, i32 100, i32 101, i32 102, i32 103, i32 104, i32 105, i32 106, i32 107, i32 108, i32 109, i32 110, i32 111, i32 112, i32 113, i32 114, i32 115, i32 116, i32 117, i32 118, i32 119, i32 120, i32 121, i32 122, i32 123, i32 124, i32 125> 1178 ret <64 x i8> %r 1179} 1180 1181define <64 x i8> @shuffle_v64i8_63_64_65_66_67_68_69_70_71_72_73_74_75_76_77_78_79_80_81_82_83_84_85_86_87_88_89_90_91_92_93_94_95_96_97_98_99_100_101_102_103_104_105_106_107_108_109_110_111_112_113_114_115_116_117_118_119_120_121_122_123_124_125_126(<64 x i8> %a0, <64 x i8> %a1) { 1182; AVX512F-LABEL: shuffle_v64i8_63_64_65_66_67_68_69_70_71_72_73_74_75_76_77_78_79_80_81_82_83_84_85_86_87_88_89_90_91_92_93_94_95_96_97_98_99_100_101_102_103_104_105_106_107_108_109_110_111_112_113_114_115_116_117_118_119_120_121_122_123_124_125_126: 1183; AVX512F: # %bb.0: 1184; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm2 1185; AVX512F-NEXT: vperm2i128 {{.*#+}} ymm3 = ymm0[2,3],ymm2[0,1] 1186; AVX512F-NEXT: vpalignr {{.*#+}} ymm2 = ymm3[15],ymm2[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],ymm3[31],ymm2[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30] 1187; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm1 1188; AVX512F-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm1[2,3],ymm0[0,1] 1189; AVX512F-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[15],ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],ymm1[31],ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30] 1190; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 1191; AVX512F-NEXT: retq 1192; 1193; AVX512BW-LABEL: shuffle_v64i8_63_64_65_66_67_68_69_70_71_72_73_74_75_76_77_78_79_80_81_82_83_84_85_86_87_88_89_90_91_92_93_94_95_96_97_98_99_100_101_102_103_104_105_106_107_108_109_110_111_112_113_114_115_116_117_118_119_120_121_122_123_124_125_126: 1194; AVX512BW: # %bb.0: 1195; AVX512BW-NEXT: valignq {{.*#+}} zmm1 = zmm1[6,7],zmm0[0,1,2,3,4,5] 1196; AVX512BW-NEXT: vpalignr {{.*#+}} zmm0 = zmm1[15],zmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],zmm1[31],zmm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30],zmm1[47],zmm0[32,33,34,35,36,37,38,39,40,41,42,43,44,45,46],zmm1[63],zmm0[48,49,50,51,52,53,54,55,56,57,58,59,60,61,62] 1197; AVX512BW-NEXT: retq 1198; 1199; AVX512DQ-LABEL: shuffle_v64i8_63_64_65_66_67_68_69_70_71_72_73_74_75_76_77_78_79_80_81_82_83_84_85_86_87_88_89_90_91_92_93_94_95_96_97_98_99_100_101_102_103_104_105_106_107_108_109_110_111_112_113_114_115_116_117_118_119_120_121_122_123_124_125_126: 1200; AVX512DQ: # %bb.0: 1201; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm2 1202; AVX512DQ-NEXT: vperm2i128 {{.*#+}} ymm3 = ymm0[2,3],ymm2[0,1] 1203; AVX512DQ-NEXT: vpalignr {{.*#+}} ymm2 = ymm3[15],ymm2[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],ymm3[31],ymm2[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30] 1204; AVX512DQ-NEXT: vextracti64x4 $1, %zmm1, %ymm1 1205; AVX512DQ-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm1[2,3],ymm0[0,1] 1206; AVX512DQ-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[15],ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],ymm1[31],ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30] 1207; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 1208; AVX512DQ-NEXT: retq 1209; 1210; AVX512VBMI-LABEL: shuffle_v64i8_63_64_65_66_67_68_69_70_71_72_73_74_75_76_77_78_79_80_81_82_83_84_85_86_87_88_89_90_91_92_93_94_95_96_97_98_99_100_101_102_103_104_105_106_107_108_109_110_111_112_113_114_115_116_117_118_119_120_121_122_123_124_125_126: 1211; AVX512VBMI: # %bb.0: 1212; AVX512VBMI-NEXT: valignq {{.*#+}} zmm1 = zmm1[6,7],zmm0[0,1,2,3,4,5] 1213; AVX512VBMI-NEXT: vpalignr {{.*#+}} zmm0 = zmm1[15],zmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],zmm1[31],zmm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30],zmm1[47],zmm0[32,33,34,35,36,37,38,39,40,41,42,43,44,45,46],zmm1[63],zmm0[48,49,50,51,52,53,54,55,56,57,58,59,60,61,62] 1214; AVX512VBMI-NEXT: retq 1215 %r = shufflevector <64 x i8> %a1, <64 x i8> %a0, <64 x i32> <i32 63, i32 64, i32 65, i32 66, i32 67, i32 68, i32 69, i32 70, i32 71, i32 72, i32 73, i32 74, i32 75, i32 76, i32 77, i32 78, i32 79, i32 80, i32 81, i32 82, i32 83, i32 84, i32 85, i32 86, i32 87, i32 88, i32 89, i32 90, i32 91, i32 92, i32 93, i32 94, i32 95, i32 96, i32 97, i32 98, i32 99, i32 100, i32 101, i32 102, i32 103, i32 104, i32 105, i32 106, i32 107, i32 108, i32 109, i32 110, i32 111, i32 112, i32 113, i32 114, i32 115, i32 116, i32 117, i32 118, i32 119, i32 120, i32 121, i32 122, i32 123, i32 124, i32 125, i32 126> 1216 ret <64 x i8> %r 1217} 1218 1219define <64 x i8> @shuffle_v64i8_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_32_33_34_35_36_37_38_39_40_41_42_66_68_72_74_78_80_84_86_90_92_96_98_102_104_108_110_114_116_120_122_126(<64 x i8> %a0, <64 x i8> %a1) { 1220; AVX512F-LABEL: shuffle_v64i8_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_32_33_34_35_36_37_38_39_40_41_42_66_68_72_74_78_80_84_86_90_92_96_98_102_104_108_110_114_116_120_122_126: 1221; AVX512F: # %bb.0: 1222; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm2 1223; AVX512F-NEXT: vextracti128 $1, %ymm2, %xmm3 1224; AVX512F-NEXT: vpshufb {{.*#+}} xmm3 = xmm3[u,u,u,u,u],zero,zero,zero,zero,zero,zero,xmm3[2,4,8,10,14] 1225; AVX512F-NEXT: vpshufb {{.*#+}} xmm2 = xmm2[u,u,u,u,u,0,2,6,8,12,14],zero,zero,zero,zero,zero 1226; AVX512F-NEXT: vpor %xmm3, %xmm2, %xmm2 1227; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 1228; AVX512F-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,u,u,u,u,u,u,u,2,4,8,10,14,16,20,22,26,28,u,u,u,u,u,u,u,u,u,u,u] 1229; AVX512F-NEXT: vpmovsxwd {{.*#+}} ymm3 = [0,0,0,0,0,4294967040,4294967295,4294967295] 1230; AVX512F-NEXT: vpblendvb %ymm3, %ymm2, %ymm1, %ymm1 1231; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm2 1232; AVX512F-NEXT: vpmovsxdq {{.*#+}} ymm3 = [0,18446744073692774400,18446744073709551615,18446744073709551615] 1233; AVX512F-NEXT: vpblendvb %ymm3, %ymm1, %ymm2, %ymm1 1234; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 1235; AVX512F-NEXT: retq 1236; 1237; AVX512BW-LABEL: shuffle_v64i8_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_32_33_34_35_36_37_38_39_40_41_42_66_68_72_74_78_80_84_86_90_92_96_98_102_104_108_110_114_116_120_122_126: 1238; AVX512BW: # %bb.0: 1239; AVX512BW-NEXT: vextracti64x4 $1, %zmm1, %ymm2 1240; AVX512BW-NEXT: vextracti128 $1, %ymm2, %xmm3 1241; AVX512BW-NEXT: vpshufb {{.*#+}} xmm3 = xmm3[u,u,u,u,u],zero,zero,zero,zero,zero,zero,xmm3[2,4,8,10,14] 1242; AVX512BW-NEXT: vpshufb {{.*#+}} xmm2 = xmm2[u,u,u,u,u,0,2,6,8,12,14],zero,zero,zero,zero,zero 1243; AVX512BW-NEXT: vpor %xmm3, %xmm2, %xmm2 1244; AVX512BW-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 1245; AVX512BW-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,u,u,u,u,u,u,u,2,4,8,10,14,16,20,22,26,28,u,u,u,u,u,u,u,u,u,u,u] 1246; AVX512BW-NEXT: vpmovsxwd {{.*#+}} ymm3 = [0,0,0,0,0,4294967040,4294967295,4294967295] 1247; AVX512BW-NEXT: vpblendvb %ymm3, %ymm2, %ymm1, %ymm1 1248; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm2 1249; AVX512BW-NEXT: vpmovsxdq {{.*#+}} ymm3 = [0,18446744073692774400,18446744073709551615,18446744073709551615] 1250; AVX512BW-NEXT: vpblendvb %ymm3, %ymm1, %ymm2, %ymm1 1251; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 1252; AVX512BW-NEXT: retq 1253; 1254; AVX512DQ-LABEL: shuffle_v64i8_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_32_33_34_35_36_37_38_39_40_41_42_66_68_72_74_78_80_84_86_90_92_96_98_102_104_108_110_114_116_120_122_126: 1255; AVX512DQ: # %bb.0: 1256; AVX512DQ-NEXT: vextracti64x4 $1, %zmm1, %ymm2 1257; AVX512DQ-NEXT: vextracti128 $1, %ymm2, %xmm3 1258; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm3 = xmm3[u,u,u,u,u],zero,zero,zero,zero,zero,zero,xmm3[2,4,8,10,14] 1259; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm2 = xmm2[u,u,u,u,u,0,2,6,8,12,14],zero,zero,zero,zero,zero 1260; AVX512DQ-NEXT: vpor %xmm3, %xmm2, %xmm2 1261; AVX512DQ-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 1262; AVX512DQ-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,u,u,u,u,u,u,u,2,4,8,10,14,16,20,22,26,28,u,u,u,u,u,u,u,u,u,u,u] 1263; AVX512DQ-NEXT: vpmovsxwd {{.*#+}} ymm3 = [0,0,0,0,0,4294967040,4294967295,4294967295] 1264; AVX512DQ-NEXT: vpblendvb %ymm3, %ymm2, %ymm1, %ymm1 1265; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm2 1266; AVX512DQ-NEXT: vpmovsxdq {{.*#+}} ymm3 = [0,18446744073692774400,18446744073709551615,18446744073709551615] 1267; AVX512DQ-NEXT: vpblendvb %ymm3, %ymm1, %ymm2, %ymm1 1268; AVX512DQ-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 1269; AVX512DQ-NEXT: retq 1270; 1271; AVX512VBMI-LABEL: shuffle_v64i8_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_32_33_34_35_36_37_38_39_40_41_42_66_68_72_74_78_80_84_86_90_92_96_98_102_104_108_110_114_116_120_122_126: 1272; AVX512VBMI: # %bb.0: 1273; AVX512VBMI-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,66,68,72,74,78,80,84,86,90,92,96,98,102,104,108,110,114,116,120,122,126] 1274; AVX512VBMI-NEXT: vpermt2b %zmm1, %zmm2, %zmm0 1275; AVX512VBMI-NEXT: retq 1276 %r = shufflevector <64 x i8> %a0, <64 x i8> %a1, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 66, i32 68, i32 72, i32 74, i32 78, i32 80, i32 84, i32 86, i32 90, i32 92, i32 96, i32 98, i32 102, i32 104, i32 108, i32 110, i32 114, i32 116, i32 120, i32 122, i32 126> 1277 ret <64 x i8> %r 1278} 1279 1280define <64 x i8> @shuffle_v64i8_ashr_00_01_04_05_08_09_12_13_64_65_68_69_72_73_76_77_16_17_20_21_24_25_28_29_80_81_84_85_88_89_92_93_32_33_36_37_40_41_44_45_96_97_100_101_104_105_108_109_48_49_52_53_56_57_60_61_112_113_116_117_120_121_124_125(<16 x i32> %a0, <16 x i32> %a1) nounwind { 1281; AVX512F-LABEL: shuffle_v64i8_ashr_00_01_04_05_08_09_12_13_64_65_68_69_72_73_76_77_16_17_20_21_24_25_28_29_80_81_84_85_88_89_92_93_32_33_36_37_40_41_44_45_96_97_100_101_104_105_108_109_48_49_52_53_56_57_60_61_112_113_116_117_120_121_124_125: 1282; AVX512F: # %bb.0: 1283; AVX512F-NEXT: vpsrad $25, %zmm0, %zmm0 1284; AVX512F-NEXT: vpsrad $25, %zmm1, %zmm1 1285; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm2 1286; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm3 1287; AVX512F-NEXT: vpackssdw %ymm2, %ymm3, %ymm2 1288; AVX512F-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 1289; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 1290; AVX512F-NEXT: retq 1291; 1292; AVX512BW-LABEL: shuffle_v64i8_ashr_00_01_04_05_08_09_12_13_64_65_68_69_72_73_76_77_16_17_20_21_24_25_28_29_80_81_84_85_88_89_92_93_32_33_36_37_40_41_44_45_96_97_100_101_104_105_108_109_48_49_52_53_56_57_60_61_112_113_116_117_120_121_124_125: 1293; AVX512BW: # %bb.0: 1294; AVX512BW-NEXT: vpsrad $25, %zmm0, %zmm0 1295; AVX512BW-NEXT: vpsrad $25, %zmm1, %zmm1 1296; AVX512BW-NEXT: vpackssdw %zmm1, %zmm0, %zmm0 1297; AVX512BW-NEXT: retq 1298; 1299; AVX512DQ-LABEL: shuffle_v64i8_ashr_00_01_04_05_08_09_12_13_64_65_68_69_72_73_76_77_16_17_20_21_24_25_28_29_80_81_84_85_88_89_92_93_32_33_36_37_40_41_44_45_96_97_100_101_104_105_108_109_48_49_52_53_56_57_60_61_112_113_116_117_120_121_124_125: 1300; AVX512DQ: # %bb.0: 1301; AVX512DQ-NEXT: vpsrad $25, %zmm0, %zmm0 1302; AVX512DQ-NEXT: vpsrad $25, %zmm1, %zmm1 1303; AVX512DQ-NEXT: vextracti64x4 $1, %zmm1, %ymm2 1304; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm3 1305; AVX512DQ-NEXT: vpackssdw %ymm2, %ymm3, %ymm2 1306; AVX512DQ-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 1307; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 1308; AVX512DQ-NEXT: retq 1309; 1310; AVX512VBMI-LABEL: shuffle_v64i8_ashr_00_01_04_05_08_09_12_13_64_65_68_69_72_73_76_77_16_17_20_21_24_25_28_29_80_81_84_85_88_89_92_93_32_33_36_37_40_41_44_45_96_97_100_101_104_105_108_109_48_49_52_53_56_57_60_61_112_113_116_117_120_121_124_125: 1311; AVX512VBMI: # %bb.0: 1312; AVX512VBMI-NEXT: vpsrad $25, %zmm0, %zmm0 1313; AVX512VBMI-NEXT: vpsrad $25, %zmm1, %zmm1 1314; AVX512VBMI-NEXT: vpackssdw %zmm1, %zmm0, %zmm0 1315; AVX512VBMI-NEXT: retq 1316 %1 = ashr <16 x i32> %a0, <i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25> 1317 %2 = ashr <16 x i32> %a1, <i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25> 1318 %3 = bitcast <16 x i32> %1 to <64 x i8> 1319 %4 = bitcast <16 x i32> %2 to <64 x i8> 1320 %5 = shufflevector <64 x i8> %3, <64 x i8> %4, <64 x i32> <i32 0, i32 1, i32 4, i32 5, i32 8, i32 9, i32 12, i32 13, i32 64, i32 65, i32 68, i32 69, i32 72, i32 73, i32 76, i32 77, i32 16, i32 17, i32 20, i32 21, i32 24, i32 25, i32 28, i32 29, i32 80, i32 81, i32 84, i32 85, i32 88, i32 89, i32 92, i32 93, i32 32, i32 33, i32 36, i32 37, i32 40, i32 41, i32 44, i32 45, i32 96, i32 97, i32 100, i32 101, i32 104, i32 105, i32 108, i32 109, i32 48, i32 49, i32 52, i32 53, i32 56, i32 57, i32 60, i32 61, i32 112, i32 113, i32 116, i32 117, i32 120, i32 121, i32 124, i32 125> 1321 ret <64 x i8> %5 1322} 1323 1324define <64 x i8> @shuffle_v64i8_ashr_00_04_08_12_64_68_72_76_00_04_08_12_64_68_72_76_16_20_24_28_80_84_88_92_16_20_24_28_80_84_88_92_32_36_40_44_96_100_104_108_32_36_40_44_96_100_104_108_48_52_56_60_112_116_120_124_48_52_56_60_112_116_120_124(<16 x i32> %a0, <16 x i32> %a1) nounwind { 1325; AVX512F-LABEL: shuffle_v64i8_ashr_00_04_08_12_64_68_72_76_00_04_08_12_64_68_72_76_16_20_24_28_80_84_88_92_16_20_24_28_80_84_88_92_32_36_40_44_96_100_104_108_32_36_40_44_96_100_104_108_48_52_56_60_112_116_120_124_48_52_56_60_112_116_120_124: 1326; AVX512F: # %bb.0: 1327; AVX512F-NEXT: vpsrad $25, %zmm0, %zmm0 1328; AVX512F-NEXT: vpsrad $25, %zmm1, %zmm1 1329; AVX512F-NEXT: vpackssdw %ymm1, %ymm0, %ymm2 1330; AVX512F-NEXT: vpacksswb %ymm2, %ymm2, %ymm2 1331; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm1 1332; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm0 1333; AVX512F-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 1334; AVX512F-NEXT: vpacksswb %ymm0, %ymm0, %ymm0 1335; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm2, %zmm0 1336; AVX512F-NEXT: retq 1337; 1338; AVX512BW-LABEL: shuffle_v64i8_ashr_00_04_08_12_64_68_72_76_00_04_08_12_64_68_72_76_16_20_24_28_80_84_88_92_16_20_24_28_80_84_88_92_32_36_40_44_96_100_104_108_32_36_40_44_96_100_104_108_48_52_56_60_112_116_120_124_48_52_56_60_112_116_120_124: 1339; AVX512BW: # %bb.0: 1340; AVX512BW-NEXT: vpsrad $25, %zmm0, %zmm0 1341; AVX512BW-NEXT: vpsrad $25, %zmm1, %zmm1 1342; AVX512BW-NEXT: vpackssdw %zmm1, %zmm0, %zmm0 1343; AVX512BW-NEXT: vpacksswb %zmm0, %zmm0, %zmm0 1344; AVX512BW-NEXT: retq 1345; 1346; AVX512DQ-LABEL: shuffle_v64i8_ashr_00_04_08_12_64_68_72_76_00_04_08_12_64_68_72_76_16_20_24_28_80_84_88_92_16_20_24_28_80_84_88_92_32_36_40_44_96_100_104_108_32_36_40_44_96_100_104_108_48_52_56_60_112_116_120_124_48_52_56_60_112_116_120_124: 1347; AVX512DQ: # %bb.0: 1348; AVX512DQ-NEXT: vpsrad $25, %zmm0, %zmm0 1349; AVX512DQ-NEXT: vpsrad $25, %zmm1, %zmm1 1350; AVX512DQ-NEXT: vpackssdw %ymm1, %ymm0, %ymm2 1351; AVX512DQ-NEXT: vpacksswb %ymm2, %ymm2, %ymm2 1352; AVX512DQ-NEXT: vextracti64x4 $1, %zmm1, %ymm1 1353; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm0 1354; AVX512DQ-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 1355; AVX512DQ-NEXT: vpacksswb %ymm0, %ymm0, %ymm0 1356; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm2, %zmm0 1357; AVX512DQ-NEXT: retq 1358; 1359; AVX512VBMI-LABEL: shuffle_v64i8_ashr_00_04_08_12_64_68_72_76_00_04_08_12_64_68_72_76_16_20_24_28_80_84_88_92_16_20_24_28_80_84_88_92_32_36_40_44_96_100_104_108_32_36_40_44_96_100_104_108_48_52_56_60_112_116_120_124_48_52_56_60_112_116_120_124: 1360; AVX512VBMI: # %bb.0: 1361; AVX512VBMI-NEXT: vpsrad $25, %zmm0, %zmm0 1362; AVX512VBMI-NEXT: vpsrad $25, %zmm1, %zmm1 1363; AVX512VBMI-NEXT: vpackssdw %zmm1, %zmm0, %zmm0 1364; AVX512VBMI-NEXT: vpacksswb %zmm0, %zmm0, %zmm0 1365; AVX512VBMI-NEXT: retq 1366 %1 = ashr <16 x i32> %a0, <i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25> 1367 %2 = ashr <16 x i32> %a1, <i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25> 1368 %3 = bitcast <16 x i32> %1 to <64 x i8> 1369 %4 = bitcast <16 x i32> %2 to <64 x i8> 1370 %5 = shufflevector <64 x i8> %3, <64 x i8> %4, <64 x i32> <i32 0, i32 4, i32 8, i32 12, i32 64, i32 68, i32 72, i32 76, i32 0, i32 4, i32 8, i32 12, i32 64, i32 68, i32 72, i32 76, i32 16, i32 20, i32 24, i32 28, i32 80, i32 84, i32 88, i32 92, i32 16, i32 20, i32 24, i32 28, i32 80, i32 84, i32 88, i32 92, i32 32, i32 36, i32 40, i32 44, i32 96, i32 100, i32 104, i32 108, i32 32, i32 36, i32 40, i32 44, i32 96, i32 100, i32 104, i32 108, i32 48, i32 52, i32 56, i32 60, i32 112, i32 116, i32 120, i32 124, i32 48, i32 52, i32 56, i32 60, i32 112, i32 116, i32 120, i32 124> 1371 ret <64 x i8> %5 1372} 1373 1374define <64 x i8> @shuffle_v64i8_lshr_00_01_04_05_08_09_12_13_64_65_68_69_72_73_76_77_16_17_20_21_24_25_28_29_80_81_84_85_88_89_92_93_32_33_36_37_40_41_44_45_96_97_100_101_104_105_108_109_48_49_52_53_56_57_60_61_112_113_116_117_120_121_124_125(<16 x i32> %a0, <16 x i32> %a1) nounwind { 1375; AVX512F-LABEL: shuffle_v64i8_lshr_00_01_04_05_08_09_12_13_64_65_68_69_72_73_76_77_16_17_20_21_24_25_28_29_80_81_84_85_88_89_92_93_32_33_36_37_40_41_44_45_96_97_100_101_104_105_108_109_48_49_52_53_56_57_60_61_112_113_116_117_120_121_124_125: 1376; AVX512F: # %bb.0: 1377; AVX512F-NEXT: vpsrld $25, %zmm0, %zmm0 1378; AVX512F-NEXT: vpsrld $25, %zmm1, %zmm1 1379; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm2 1380; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm3 1381; AVX512F-NEXT: vpackusdw %ymm2, %ymm3, %ymm2 1382; AVX512F-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 1383; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 1384; AVX512F-NEXT: retq 1385; 1386; AVX512BW-LABEL: shuffle_v64i8_lshr_00_01_04_05_08_09_12_13_64_65_68_69_72_73_76_77_16_17_20_21_24_25_28_29_80_81_84_85_88_89_92_93_32_33_36_37_40_41_44_45_96_97_100_101_104_105_108_109_48_49_52_53_56_57_60_61_112_113_116_117_120_121_124_125: 1387; AVX512BW: # %bb.0: 1388; AVX512BW-NEXT: vpsrld $25, %zmm0, %zmm0 1389; AVX512BW-NEXT: vpsrld $25, %zmm1, %zmm1 1390; AVX512BW-NEXT: vpackusdw %zmm1, %zmm0, %zmm0 1391; AVX512BW-NEXT: retq 1392; 1393; AVX512DQ-LABEL: shuffle_v64i8_lshr_00_01_04_05_08_09_12_13_64_65_68_69_72_73_76_77_16_17_20_21_24_25_28_29_80_81_84_85_88_89_92_93_32_33_36_37_40_41_44_45_96_97_100_101_104_105_108_109_48_49_52_53_56_57_60_61_112_113_116_117_120_121_124_125: 1394; AVX512DQ: # %bb.0: 1395; AVX512DQ-NEXT: vpsrld $25, %zmm0, %zmm0 1396; AVX512DQ-NEXT: vpsrld $25, %zmm1, %zmm1 1397; AVX512DQ-NEXT: vextracti64x4 $1, %zmm1, %ymm2 1398; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm3 1399; AVX512DQ-NEXT: vpackusdw %ymm2, %ymm3, %ymm2 1400; AVX512DQ-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 1401; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 1402; AVX512DQ-NEXT: retq 1403; 1404; AVX512VBMI-LABEL: shuffle_v64i8_lshr_00_01_04_05_08_09_12_13_64_65_68_69_72_73_76_77_16_17_20_21_24_25_28_29_80_81_84_85_88_89_92_93_32_33_36_37_40_41_44_45_96_97_100_101_104_105_108_109_48_49_52_53_56_57_60_61_112_113_116_117_120_121_124_125: 1405; AVX512VBMI: # %bb.0: 1406; AVX512VBMI-NEXT: vpsrld $25, %zmm0, %zmm0 1407; AVX512VBMI-NEXT: vpsrld $25, %zmm1, %zmm1 1408; AVX512VBMI-NEXT: vpackusdw %zmm1, %zmm0, %zmm0 1409; AVX512VBMI-NEXT: retq 1410 %1 = lshr <16 x i32> %a0, <i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25> 1411 %2 = lshr <16 x i32> %a1, <i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25> 1412 %3 = bitcast <16 x i32> %1 to <64 x i8> 1413 %4 = bitcast <16 x i32> %2 to <64 x i8> 1414 %5 = shufflevector <64 x i8> %3, <64 x i8> %4, <64 x i32> <i32 0, i32 1, i32 4, i32 5, i32 8, i32 9, i32 12, i32 13, i32 64, i32 65, i32 68, i32 69, i32 72, i32 73, i32 76, i32 77, i32 16, i32 17, i32 20, i32 21, i32 24, i32 25, i32 28, i32 29, i32 80, i32 81, i32 84, i32 85, i32 88, i32 89, i32 92, i32 93, i32 32, i32 33, i32 36, i32 37, i32 40, i32 41, i32 44, i32 45, i32 96, i32 97, i32 100, i32 101, i32 104, i32 105, i32 108, i32 109, i32 48, i32 49, i32 52, i32 53, i32 56, i32 57, i32 60, i32 61, i32 112, i32 113, i32 116, i32 117, i32 120, i32 121, i32 124, i32 125> 1415 ret <64 x i8> %5 1416} 1417 1418define <64 x i8> @shuffle_v64i8_lshr_00_04_08_12_64_68_72_76_00_04_08_12_64_68_72_76_16_20_24_28_80_84_88_092_16_20_24_28_80_84_88_92_32_36_40_44_96_100_104_108_32_36_40_44_96_100_104_108_48_52_56_60_112_116_120_124_48_52_56_60_112_116_120_124(<16 x i32> %a0, <16 x i32> %a1) nounwind { 1419; AVX512F-LABEL: shuffle_v64i8_lshr_00_04_08_12_64_68_72_76_00_04_08_12_64_68_72_76_16_20_24_28_80_84_88_092_16_20_24_28_80_84_88_92_32_36_40_44_96_100_104_108_32_36_40_44_96_100_104_108_48_52_56_60_112_116_120_124_48_52_56_60_112_116_120_124: 1420; AVX512F: # %bb.0: 1421; AVX512F-NEXT: vpsrld $25, %zmm0, %zmm0 1422; AVX512F-NEXT: vpsrld $25, %zmm1, %zmm1 1423; AVX512F-NEXT: vpackusdw %ymm1, %ymm0, %ymm2 1424; AVX512F-NEXT: vpackuswb %ymm2, %ymm2, %ymm2 1425; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm1 1426; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm0 1427; AVX512F-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 1428; AVX512F-NEXT: vpackuswb %ymm0, %ymm0, %ymm0 1429; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm2, %zmm0 1430; AVX512F-NEXT: retq 1431; 1432; AVX512BW-LABEL: shuffle_v64i8_lshr_00_04_08_12_64_68_72_76_00_04_08_12_64_68_72_76_16_20_24_28_80_84_88_092_16_20_24_28_80_84_88_92_32_36_40_44_96_100_104_108_32_36_40_44_96_100_104_108_48_52_56_60_112_116_120_124_48_52_56_60_112_116_120_124: 1433; AVX512BW: # %bb.0: 1434; AVX512BW-NEXT: vpsrld $25, %zmm0, %zmm0 1435; AVX512BW-NEXT: vpsrld $25, %zmm1, %zmm1 1436; AVX512BW-NEXT: vpackusdw %zmm1, %zmm0, %zmm0 1437; AVX512BW-NEXT: vpackuswb %zmm0, %zmm0, %zmm0 1438; AVX512BW-NEXT: retq 1439; 1440; AVX512DQ-LABEL: shuffle_v64i8_lshr_00_04_08_12_64_68_72_76_00_04_08_12_64_68_72_76_16_20_24_28_80_84_88_092_16_20_24_28_80_84_88_92_32_36_40_44_96_100_104_108_32_36_40_44_96_100_104_108_48_52_56_60_112_116_120_124_48_52_56_60_112_116_120_124: 1441; AVX512DQ: # %bb.0: 1442; AVX512DQ-NEXT: vpsrld $25, %zmm0, %zmm0 1443; AVX512DQ-NEXT: vpsrld $25, %zmm1, %zmm1 1444; AVX512DQ-NEXT: vpackusdw %ymm1, %ymm0, %ymm2 1445; AVX512DQ-NEXT: vpackuswb %ymm2, %ymm2, %ymm2 1446; AVX512DQ-NEXT: vextracti64x4 $1, %zmm1, %ymm1 1447; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm0 1448; AVX512DQ-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 1449; AVX512DQ-NEXT: vpackuswb %ymm0, %ymm0, %ymm0 1450; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm2, %zmm0 1451; AVX512DQ-NEXT: retq 1452; 1453; AVX512VBMI-LABEL: shuffle_v64i8_lshr_00_04_08_12_64_68_72_76_00_04_08_12_64_68_72_76_16_20_24_28_80_84_88_092_16_20_24_28_80_84_88_92_32_36_40_44_96_100_104_108_32_36_40_44_96_100_104_108_48_52_56_60_112_116_120_124_48_52_56_60_112_116_120_124: 1454; AVX512VBMI: # %bb.0: 1455; AVX512VBMI-NEXT: vpsrld $25, %zmm0, %zmm0 1456; AVX512VBMI-NEXT: vpsrld $25, %zmm1, %zmm1 1457; AVX512VBMI-NEXT: vpackusdw %zmm1, %zmm0, %zmm0 1458; AVX512VBMI-NEXT: vpackuswb %zmm0, %zmm0, %zmm0 1459; AVX512VBMI-NEXT: retq 1460 %1 = lshr <16 x i32> %a0, <i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25> 1461 %2 = lshr <16 x i32> %a1, <i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25> 1462 %3 = bitcast <16 x i32> %1 to <64 x i8> 1463 %4 = bitcast <16 x i32> %2 to <64 x i8> 1464 %5 = shufflevector <64 x i8> %3, <64 x i8> %4, <64 x i32> <i32 0, i32 4, i32 8, i32 12, i32 64, i32 68, i32 72, i32 76, i32 0, i32 4, i32 8, i32 12, i32 64, i32 68, i32 72, i32 76, i32 16, i32 20, i32 24, i32 28, i32 80, i32 84, i32 88, i32 92, i32 16, i32 20, i32 24, i32 28, i32 80, i32 84, i32 88, i32 92, i32 32, i32 36, i32 40, i32 44, i32 96, i32 100, i32 104, i32 108, i32 32, i32 36, i32 40, i32 44, i32 96, i32 100, i32 104, i32 108, i32 48, i32 52, i32 56, i32 60, i32 112, i32 116, i32 120, i32 124, i32 48, i32 52, i32 56, i32 60, i32 112, i32 116, i32 120, i32 124> 1465 ret <64 x i8> %5 1466} 1467 1468define <64 x i8> @shuffle_v64i8_shift_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30_32_34_36_38_40_42_44_46_48_50_52_54_56_58_60_62_64_66_68_70_72_74_76_78_80_82_84_86_88_90_92_94_96_98_100_102_104_106_108_110_112_114_116_118_120_122_124_126(<32 x i16> %a0, <32 x i16> %a1) { 1469; AVX512F-LABEL: shuffle_v64i8_shift_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30_32_34_36_38_40_42_44_46_48_50_52_54_56_58_60_62_64_66_68_70_72_74_76_78_80_82_84_86_88_90_92_94_96_98_100_102_104_106_108_110_112_114_116_118_120_122_124_126: 1470; AVX512F: # %bb.0: 1471; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm2 1472; AVX512F-NEXT: vpsrlw $8, %ymm2, %ymm2 1473; AVX512F-NEXT: vpsrlw $8, %ymm0, %ymm0 1474; AVX512F-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 1475; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm2 1476; AVX512F-NEXT: vpsrlw $8, %ymm2, %ymm2 1477; AVX512F-NEXT: vpsrlw $8, %ymm1, %ymm1 1478; AVX512F-NEXT: vpackuswb %ymm2, %ymm1, %ymm1 1479; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 1480; AVX512F-NEXT: vpermq {{.*#+}} zmm0 = zmm0[0,2,1,3,4,6,5,7] 1481; AVX512F-NEXT: retq 1482; 1483; AVX512BW-LABEL: shuffle_v64i8_shift_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30_32_34_36_38_40_42_44_46_48_50_52_54_56_58_60_62_64_66_68_70_72_74_76_78_80_82_84_86_88_90_92_94_96_98_100_102_104_106_108_110_112_114_116_118_120_122_124_126: 1484; AVX512BW: # %bb.0: 1485; AVX512BW-NEXT: vpsrlw $8, %zmm0, %zmm0 1486; AVX512BW-NEXT: vpsrlw $8, %zmm1, %zmm1 1487; AVX512BW-NEXT: vpackuswb %zmm1, %zmm0, %zmm0 1488; AVX512BW-NEXT: vpmovsxbq {{.*#+}} zmm1 = [0,2,4,6,1,3,5,7] 1489; AVX512BW-NEXT: vpermq %zmm0, %zmm1, %zmm0 1490; AVX512BW-NEXT: retq 1491; 1492; AVX512DQ-LABEL: shuffle_v64i8_shift_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30_32_34_36_38_40_42_44_46_48_50_52_54_56_58_60_62_64_66_68_70_72_74_76_78_80_82_84_86_88_90_92_94_96_98_100_102_104_106_108_110_112_114_116_118_120_122_124_126: 1493; AVX512DQ: # %bb.0: 1494; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm2 1495; AVX512DQ-NEXT: vpsrlw $8, %ymm2, %ymm2 1496; AVX512DQ-NEXT: vpsrlw $8, %ymm0, %ymm0 1497; AVX512DQ-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 1498; AVX512DQ-NEXT: vextracti64x4 $1, %zmm1, %ymm2 1499; AVX512DQ-NEXT: vpsrlw $8, %ymm2, %ymm2 1500; AVX512DQ-NEXT: vpsrlw $8, %ymm1, %ymm1 1501; AVX512DQ-NEXT: vpackuswb %ymm2, %ymm1, %ymm1 1502; AVX512DQ-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 1503; AVX512DQ-NEXT: vpermq {{.*#+}} zmm0 = zmm0[0,2,1,3,4,6,5,7] 1504; AVX512DQ-NEXT: retq 1505; 1506; AVX512VBMI-LABEL: shuffle_v64i8_shift_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30_32_34_36_38_40_42_44_46_48_50_52_54_56_58_60_62_64_66_68_70_72_74_76_78_80_82_84_86_88_90_92_94_96_98_100_102_104_106_108_110_112_114_116_118_120_122_124_126: 1507; AVX512VBMI: # %bb.0: 1508; AVX512VBMI-NEXT: vmovdqa64 {{.*#+}} zmm2 = [1,3,5,7,9,11,13,15,17,19,21,23,25,27,29,31,33,35,37,39,41,43,45,47,49,51,53,55,57,59,61,63,65,67,69,71,73,75,77,79,81,83,85,87,89,91,93,95,97,99,101,103,105,107,109,111,113,115,117,119,121,123,125,127] 1509; AVX512VBMI-NEXT: vpermt2b %zmm1, %zmm2, %zmm0 1510; AVX512VBMI-NEXT: retq 1511 %1 = lshr <32 x i16> %a0, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> 1512 %2 = lshr <32 x i16> %a1, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> 1513 %3 = bitcast <32 x i16> %1 to <64 x i8> 1514 %4 = bitcast <32 x i16> %2 to <64 x i8> 1515 %5 = shufflevector <64 x i8> %3, <64 x i8> %4, <64 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30, i32 32, i32 34, i32 36, i32 38, i32 40, i32 42, i32 44, i32 46, i32 48, i32 50, i32 52, i32 54, i32 56, i32 58, i32 60, i32 62, i32 64, i32 66, i32 68, i32 70, i32 72, i32 74, i32 76, i32 78, i32 80, i32 82, i32 84, i32 86, i32 88, i32 90, i32 92, i32 94, i32 96, i32 98, i32 100, i32 102, i32 104, i32 106, i32 108, i32 110, i32 112, i32 114, i32 116, i32 118, i32 120, i32 122, i32 124, i32 126> 1516 ret <64 x i8> %5 1517} 1518 1519define <64 x i8> @shuffle_v64i8_shift_00_02_04_06_08_10_12_14_64_66_68_70_72_74_76_78_16_18_20_22_24_26_28_30_80_82_84_86_88_90_92_94_32_34_36_38_40_42_44_46_96_98_100_102_104_106_108_110_48_50_52_54_56_58_60_62_112_114_116_118_120_122_124_126(<32 x i16> %a0, <32 x i16> %a1) { 1520; AVX512F-LABEL: shuffle_v64i8_shift_00_02_04_06_08_10_12_14_64_66_68_70_72_74_76_78_16_18_20_22_24_26_28_30_80_82_84_86_88_90_92_94_32_34_36_38_40_42_44_46_96_98_100_102_104_106_108_110_48_50_52_54_56_58_60_62_112_114_116_118_120_122_124_126: 1521; AVX512F: # %bb.0: 1522; AVX512F-NEXT: vpsrlw $8, %ymm0, %ymm2 1523; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm0 1524; AVX512F-NEXT: vpsrlw $8, %ymm0, %ymm0 1525; AVX512F-NEXT: vpsrlw $8, %ymm1, %ymm3 1526; AVX512F-NEXT: vpackuswb %ymm3, %ymm2, %ymm2 1527; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm1 1528; AVX512F-NEXT: vpsrlw $8, %ymm1, %ymm1 1529; AVX512F-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 1530; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm2, %zmm0 1531; AVX512F-NEXT: retq 1532; 1533; AVX512BW-LABEL: shuffle_v64i8_shift_00_02_04_06_08_10_12_14_64_66_68_70_72_74_76_78_16_18_20_22_24_26_28_30_80_82_84_86_88_90_92_94_32_34_36_38_40_42_44_46_96_98_100_102_104_106_108_110_48_50_52_54_56_58_60_62_112_114_116_118_120_122_124_126: 1534; AVX512BW: # %bb.0: 1535; AVX512BW-NEXT: vpsrlw $8, %zmm0, %zmm0 1536; AVX512BW-NEXT: vpsrlw $8, %zmm1, %zmm1 1537; AVX512BW-NEXT: vpackuswb %zmm1, %zmm0, %zmm0 1538; AVX512BW-NEXT: retq 1539; 1540; AVX512DQ-LABEL: shuffle_v64i8_shift_00_02_04_06_08_10_12_14_64_66_68_70_72_74_76_78_16_18_20_22_24_26_28_30_80_82_84_86_88_90_92_94_32_34_36_38_40_42_44_46_96_98_100_102_104_106_108_110_48_50_52_54_56_58_60_62_112_114_116_118_120_122_124_126: 1541; AVX512DQ: # %bb.0: 1542; AVX512DQ-NEXT: vpsrlw $8, %ymm0, %ymm2 1543; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm0 1544; AVX512DQ-NEXT: vpsrlw $8, %ymm0, %ymm0 1545; AVX512DQ-NEXT: vpsrlw $8, %ymm1, %ymm3 1546; AVX512DQ-NEXT: vpackuswb %ymm3, %ymm2, %ymm2 1547; AVX512DQ-NEXT: vextracti64x4 $1, %zmm1, %ymm1 1548; AVX512DQ-NEXT: vpsrlw $8, %ymm1, %ymm1 1549; AVX512DQ-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 1550; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm2, %zmm0 1551; AVX512DQ-NEXT: retq 1552; 1553; AVX512VBMI-LABEL: shuffle_v64i8_shift_00_02_04_06_08_10_12_14_64_66_68_70_72_74_76_78_16_18_20_22_24_26_28_30_80_82_84_86_88_90_92_94_32_34_36_38_40_42_44_46_96_98_100_102_104_106_108_110_48_50_52_54_56_58_60_62_112_114_116_118_120_122_124_126: 1554; AVX512VBMI: # %bb.0: 1555; AVX512VBMI-NEXT: vpsrlw $8, %zmm0, %zmm0 1556; AVX512VBMI-NEXT: vpsrlw $8, %zmm1, %zmm1 1557; AVX512VBMI-NEXT: vpackuswb %zmm1, %zmm0, %zmm0 1558; AVX512VBMI-NEXT: retq 1559 %1 = lshr <32 x i16> %a0, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> 1560 %2 = lshr <32 x i16> %a1, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> 1561 %3 = bitcast <32 x i16> %1 to <64 x i8> 1562 %4 = bitcast <32 x i16> %2 to <64 x i8> 1563 %5 = shufflevector <64 x i8> %3, <64 x i8> %4, <64 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 64, i32 66, i32 68, i32 70, i32 72, i32 74, i32 76, i32 78, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30, i32 80, i32 82, i32 84, i32 86, i32 88, i32 90, i32 92, i32 94, i32 32, i32 34, i32 36, i32 38, i32 40, i32 42, i32 44, i32 46, i32 96, i32 98, i32 100, i32 102, i32 104, i32 106, i32 108, i32 110, i32 48, i32 50, i32 52, i32 54, i32 56, i32 58, i32 60, i32 62, i32 112, i32 114, i32 116, i32 118, i32 120, i32 122, i32 124, i32 126> 1564 ret <64 x i8> %5 1565} 1566 1567; PR113396 1568define <64 x i8> @shuffle_v8i8_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01(<8 x i8> %0) { 1569; AVX512F-LABEL: shuffle_v8i8_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01: 1570; AVX512F: # %bb.0: 1571; AVX512F-NEXT: vpsrlw $8, %xmm0, %xmm0 1572; AVX512F-NEXT: vpbroadcastb %xmm0, %ymm0 1573; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 1574; AVX512F-NEXT: retq 1575; 1576; AVX512BW-LABEL: shuffle_v8i8_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01: 1577; AVX512BW: # %bb.0: 1578; AVX512BW-NEXT: vpsrlw $8, %xmm0, %xmm0 1579; AVX512BW-NEXT: vpbroadcastb %xmm0, %zmm0 1580; AVX512BW-NEXT: retq 1581; 1582; AVX512DQ-LABEL: shuffle_v8i8_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01: 1583; AVX512DQ: # %bb.0: 1584; AVX512DQ-NEXT: vpsrlw $8, %xmm0, %xmm0 1585; AVX512DQ-NEXT: vpbroadcastb %xmm0, %ymm0 1586; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 1587; AVX512DQ-NEXT: retq 1588; 1589; AVX512VBMI-LABEL: shuffle_v8i8_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01_01: 1590; AVX512VBMI: # %bb.0: 1591; AVX512VBMI-NEXT: vpsrlw $8, %xmm0, %xmm0 1592; AVX512VBMI-NEXT: vpbroadcastb %xmm0, %zmm0 1593; AVX512VBMI-NEXT: retq 1594 %s = shufflevector <8 x i8> %0, <8 x i8> poison, <64 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 1595 ret <64 x i8> %s 1596} 1597 1598; PR114001 1599define <64 x i8> @shuffle_v8i8_00_00_00_00_00_00_00_00_01_01_01_01_01_01_01_01_02_02_02_02_02_02_02_02_03_03_03_03_03_03_03_03_04_04_04_04_04_04_04_04_05_05_05_05_05_05_05_05_06_06_06_06_06_06_06_06_07_07_07_07_07_07_07_07(<8 x i8> %a0) { 1600; AVX512F-LABEL: shuffle_v8i8_00_00_00_00_00_00_00_00_01_01_01_01_01_01_01_01_02_02_02_02_02_02_02_02_03_03_03_03_03_03_03_03_04_04_04_04_04_04_04_04_05_05_05_05_05_05_05_05_06_06_06_06_06_06_06_06_07_07_07_07_07_07_07_07: 1601; AVX512F: # %bb.0: 1602; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 1603; AVX512F-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1] 1604; AVX512F-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,18,18,18,18,18,18,18,18,19,19,19,19,19,19,19,19] 1605; AVX512F-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,22,22,22,22,22,22,22,22,23,23,23,23,23,23,23,23] 1606; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 1607; AVX512F-NEXT: retq 1608; 1609; AVX512BW-LABEL: shuffle_v8i8_00_00_00_00_00_00_00_00_01_01_01_01_01_01_01_01_02_02_02_02_02_02_02_02_03_03_03_03_03_03_03_03_04_04_04_04_04_04_04_04_05_05_05_05_05_05_05_05_06_06_06_06_06_06_06_06_07_07_07_07_07_07_07_07: 1610; AVX512BW: # %bb.0: 1611; AVX512BW-NEXT: vpbroadcastq %xmm0, %zmm0 1612; AVX512BW-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,18,18,18,18,18,18,18,18,19,19,19,19,19,19,19,19,36,36,36,36,36,36,36,36,37,37,37,37,37,37,37,37,54,54,54,54,54,54,54,54,55,55,55,55,55,55,55,55] 1613; AVX512BW-NEXT: retq 1614; 1615; AVX512DQ-LABEL: shuffle_v8i8_00_00_00_00_00_00_00_00_01_01_01_01_01_01_01_01_02_02_02_02_02_02_02_02_03_03_03_03_03_03_03_03_04_04_04_04_04_04_04_04_05_05_05_05_05_05_05_05_06_06_06_06_06_06_06_06_07_07_07_07_07_07_07_07: 1616; AVX512DQ: # %bb.0: 1617; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 1618; AVX512DQ-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1] 1619; AVX512DQ-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,18,18,18,18,18,18,18,18,19,19,19,19,19,19,19,19] 1620; AVX512DQ-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,22,22,22,22,22,22,22,22,23,23,23,23,23,23,23,23] 1621; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 1622; AVX512DQ-NEXT: retq 1623; 1624; AVX512VBMI-LABEL: shuffle_v8i8_00_00_00_00_00_00_00_00_01_01_01_01_01_01_01_01_02_02_02_02_02_02_02_02_03_03_03_03_03_03_03_03_04_04_04_04_04_04_04_04_05_05_05_05_05_05_05_05_06_06_06_06_06_06_06_06_07_07_07_07_07_07_07_07: 1625; AVX512VBMI: # %bb.0: 1626; AVX512VBMI-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1627; AVX512VBMI-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,7,7,7,7,7,7,7,7] 1628; AVX512VBMI-NEXT: vpermb %zmm0, %zmm1, %zmm0 1629; AVX512VBMI-NEXT: retq 1630 %s = shufflevector <8 x i8> %a0, <8 x i8> poison, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> 1631 ret <64 x i8> %s 1632} 1633 1634define <64 x i8> @PR54562_ref(<64 x i8> %a0) { 1635; AVX512F-LABEL: PR54562_ref: 1636; AVX512F: # %bb.0: 1637; AVX512F-NEXT: vpermq {{.*#+}} ymm1 = ymm0[0,1,1,2] 1638; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [1,0,2,1,4,3,5,4,7,6,8,7,10,9,11,10,5,4,6,5,8,7,9,8,11,10,12,11,14,13,15,14] 1639; AVX512F-NEXT: vpshufb %ymm2, %ymm1, %ymm1 1640; AVX512F-NEXT: vextracti32x4 $2, %zmm0, %xmm3 1641; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0 1642; AVX512F-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],xmm3[0,1,2,3,4,5,6,7] 1643; AVX512F-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0 1644; AVX512F-NEXT: vpshufb %ymm2, %ymm0, %ymm0 1645; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 1646; AVX512F-NEXT: retq 1647; 1648; AVX512BW-LABEL: PR54562_ref: 1649; AVX512BW: # %bb.0: 1650; AVX512BW-NEXT: vpmovsxbq {{.*#+}} zmm1 = [0,1,1,2,3,4,4,5] 1651; AVX512BW-NEXT: vpermq %zmm0, %zmm1, %zmm0 1652; AVX512BW-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[1,0,2,1,4,3,5,4,7,6,8,7,10,9,11,10,21,20,22,21,24,23,25,24,27,26,28,27,30,29,31,30,33,32,34,33,36,35,37,36,39,38,40,39,42,41,43,42,53,52,54,53,56,55,57,56,59,58,60,59,62,61,63,62] 1653; AVX512BW-NEXT: retq 1654; 1655; AVX512DQ-LABEL: PR54562_ref: 1656; AVX512DQ: # %bb.0: 1657; AVX512DQ-NEXT: vpermq {{.*#+}} ymm1 = ymm0[0,1,1,2] 1658; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm2 = [1,0,2,1,4,3,5,4,7,6,8,7,10,9,11,10,5,4,6,5,8,7,9,8,11,10,12,11,14,13,15,14] 1659; AVX512DQ-NEXT: vpshufb %ymm2, %ymm1, %ymm1 1660; AVX512DQ-NEXT: vextracti32x4 $2, %zmm0, %xmm3 1661; AVX512DQ-NEXT: vextracti128 $1, %ymm0, %xmm0 1662; AVX512DQ-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],xmm3[0,1,2,3,4,5,6,7] 1663; AVX512DQ-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0 1664; AVX512DQ-NEXT: vpshufb %ymm2, %ymm0, %ymm0 1665; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 1666; AVX512DQ-NEXT: retq 1667; 1668; AVX512VBMI-LABEL: PR54562_ref: 1669; AVX512VBMI: # %bb.0: 1670; AVX512VBMI-NEXT: vmovdqa64 {{.*#+}} zmm1 = [1,0,2,1,4,3,5,4,7,6,8,7,10,9,11,10,13,12,14,13,16,15,17,16,19,18,20,19,22,21,23,22,25,24,26,25,28,27,29,28,31,30,32,31,34,33,35,34,37,36,38,37,40,39,41,40,43,42,44,43,46,45,47,46] 1671; AVX512VBMI-NEXT: vpermb %zmm0, %zmm1, %zmm0 1672; AVX512VBMI-NEXT: retq 1673 %shuffle1 = shufflevector <64 x i8> %a0, <64 x i8> poison, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 poison, i32 poison, i32 poison, i32 poison, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 poison, i32 poison, i32 poison, i32 poison, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 poison, i32 poison, i32 poison, i32 poison, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 poison, i32 poison, i32 poison, i32 poison> 1674 %shuffle2 = shufflevector <64 x i8> %shuffle1, <64 x i8> poison, <64 x i32> <i32 1, i32 0, i32 2, i32 1, i32 4, i32 3, i32 5, i32 4, i32 7, i32 6, i32 8, i32 7, i32 10, i32 9, i32 11, i32 10, i32 17, i32 16, i32 18, i32 17, i32 20, i32 19, i32 21, i32 20, i32 23, i32 22, i32 24, i32 23, i32 26, i32 25, i32 27, i32 26, i32 33, i32 32, i32 34, i32 33, i32 36, i32 35, i32 37, i32 36, i32 39, i32 38, i32 40, i32 39, i32 42, i32 41, i32 43, i32 42, i32 49, i32 48, i32 50, i32 49, i32 52, i32 51, i32 53, i32 52, i32 55, i32 54, i32 56, i32 55, i32 58, i32 57, i32 59, i32 58> 1675 ret <64 x i8> %shuffle2 1676} 1677 1678define void @PR54562_mem(ptr %src, ptr %dst) { 1679; AVX512F-LABEL: PR54562_mem: 1680; AVX512F: # %bb.0: 1681; AVX512F-NEXT: vmovdqa 32(%rdi), %xmm0 1682; AVX512F-NEXT: vpalignr {{.*#+}} xmm1 = mem[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7] 1683; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 1684; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [1,0,2,1,4,3,5,4,7,6,8,7,10,9,11,10,5,4,6,5,8,7,9,8,11,10,12,11,14,13,15,14] 1685; AVX512F-NEXT: vpshufb %ymm1, %ymm0, %ymm0 1686; AVX512F-NEXT: vpermq {{.*#+}} ymm2 = mem[0,1,1,2] 1687; AVX512F-NEXT: vpshufb %ymm1, %ymm2, %ymm1 1688; AVX512F-NEXT: vmovdqa %ymm1, (%rsi) 1689; AVX512F-NEXT: vmovdqa %ymm0, 32(%rsi) 1690; AVX512F-NEXT: vzeroupper 1691; AVX512F-NEXT: retq 1692; 1693; AVX512BW-LABEL: PR54562_mem: 1694; AVX512BW: # %bb.0: 1695; AVX512BW-NEXT: vpmovsxbq {{.*#+}} zmm0 = [0,1,1,2,3,4,4,5] 1696; AVX512BW-NEXT: vpermq (%rdi), %zmm0, %zmm0 1697; AVX512BW-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[1,0,2,1,4,3,5,4,7,6,8,7,10,9,11,10,21,20,22,21,24,23,25,24,27,26,28,27,30,29,31,30,33,32,34,33,36,35,37,36,39,38,40,39,42,41,43,42,53,52,54,53,56,55,57,56,59,58,60,59,62,61,63,62] 1698; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rsi) 1699; AVX512BW-NEXT: vzeroupper 1700; AVX512BW-NEXT: retq 1701; 1702; AVX512DQ-LABEL: PR54562_mem: 1703; AVX512DQ: # %bb.0: 1704; AVX512DQ-NEXT: vmovdqa 32(%rdi), %xmm0 1705; AVX512DQ-NEXT: vpalignr {{.*#+}} xmm1 = mem[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7] 1706; AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 1707; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm1 = [1,0,2,1,4,3,5,4,7,6,8,7,10,9,11,10,5,4,6,5,8,7,9,8,11,10,12,11,14,13,15,14] 1708; AVX512DQ-NEXT: vpshufb %ymm1, %ymm0, %ymm0 1709; AVX512DQ-NEXT: vpermq {{.*#+}} ymm2 = mem[0,1,1,2] 1710; AVX512DQ-NEXT: vpshufb %ymm1, %ymm2, %ymm1 1711; AVX512DQ-NEXT: vmovdqa %ymm1, (%rsi) 1712; AVX512DQ-NEXT: vmovdqa %ymm0, 32(%rsi) 1713; AVX512DQ-NEXT: vzeroupper 1714; AVX512DQ-NEXT: retq 1715; 1716; AVX512VBMI-LABEL: PR54562_mem: 1717; AVX512VBMI: # %bb.0: 1718; AVX512VBMI-NEXT: vmovdqa64 {{.*#+}} zmm0 = [1,0,2,1,4,3,5,4,7,6,8,7,10,9,11,10,13,12,14,13,16,15,17,16,19,18,20,19,22,21,23,22,25,24,26,25,28,27,29,28,31,30,32,31,34,33,35,34,37,36,38,37,40,39,41,40,43,42,44,43,46,45,47,46] 1719; AVX512VBMI-NEXT: vpermb (%rdi), %zmm0, %zmm0 1720; AVX512VBMI-NEXT: vmovdqa64 %zmm0, (%rsi) 1721; AVX512VBMI-NEXT: vzeroupper 1722; AVX512VBMI-NEXT: retq 1723 %load = load <64 x i8>, ptr %src, align 512 1724 %shuffle1 = shufflevector <64 x i8> %load, <64 x i8> poison, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 poison, i32 poison, i32 poison, i32 poison, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 poison, i32 poison, i32 poison, i32 poison, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 poison, i32 poison, i32 poison, i32 poison, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 poison, i32 poison, i32 poison, i32 poison> 1725 %shuffle2 = shufflevector <64 x i8> %shuffle1, <64 x i8> poison, <64 x i32> <i32 1, i32 0, i32 2, i32 1, i32 4, i32 3, i32 5, i32 4, i32 7, i32 6, i32 8, i32 7, i32 10, i32 9, i32 11, i32 10, i32 17, i32 16, i32 18, i32 17, i32 20, i32 19, i32 21, i32 20, i32 23, i32 22, i32 24, i32 23, i32 26, i32 25, i32 27, i32 26, i32 33, i32 32, i32 34, i32 33, i32 36, i32 35, i32 37, i32 36, i32 39, i32 38, i32 40, i32 39, i32 42, i32 41, i32 43, i32 42, i32 49, i32 48, i32 50, i32 49, i32 52, i32 51, i32 53, i32 52, i32 55, i32 54, i32 56, i32 55, i32 58, i32 57, i32 59, i32 58> 1726 store <64 x i8> %shuffle2, ptr %dst, align 512 1727 ret void 1728} 1729 1730define <64 x i8> @shuffle_v32i16_zextinreg_to_v16i32(<64 x i8> %a) { 1731; ALL-LABEL: shuffle_v32i16_zextinreg_to_v16i32: 1732; ALL: # %bb.0: 1733; ALL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero 1734; ALL-NEXT: retq 1735 %b = shufflevector <64 x i8> %a, <64 x i8> <i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 0, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison 1736>, <64 x i32> <i32 0, i32 1, i32 84, i32 84, i32 2, i32 3, i32 84, i32 84, i32 4, i32 5, i32 84, i32 84, i32 6, i32 7, i32 84, i32 84, i32 8, i32 9, i32 84, i32 84, i32 10, i32 11, i32 84, i32 84, i32 12, i32 13, i32 84, i32 84, i32 14, i32 15, i32 84, i32 84, i32 16, i32 17, i32 84, i32 84, i32 18, i32 19, i32 84, i32 84, i32 20, i32 21, i32 84, i32 84, i32 22, i32 23, i32 84, i32 84, i32 24, i32 25, i32 84, i32 84, i32 26, i32 27, i32 84, i32 84, i32 28, i32 29, i32 84, i32 84, i32 30, i32 31, i32 84, i32 84> 1737 ret <64 x i8> %b 1738} 1739