1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=ALL,AVX1 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=ALL,AVX2OR512VL,AVX2,AVX2-SLOW 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=ALL,AVX2OR512VL,AVX2,AVX2-FAST,AVX2-FAST-ALL 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=ALL,AVX2OR512VL,AVX2,AVX2-FAST,AVX2-FAST-PERLANE 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512bw | FileCheck %s --check-prefixes=ALL,AVX2OR512VL,AVX512VL,AVX512VLBW,AVX512VLBW-SLOW 7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512bw,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=ALL,AVX2OR512VL,AVX512VL,AVX512VLBW,AVX512VLBW-FAST 8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512bw,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=ALL,AVX2OR512VL,AVX512VL,AVX512VLBW,AVX512VLBW-FAST 9; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512vbmi | FileCheck %s --check-prefixes=ALL,AVX2OR512VL,AVX512VL,AVX512VLVBMI,AVX512VLVBMI-SLOW 10; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512vbmi,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=ALL,AVX2OR512VL,AVX512VL,AVX512VLVBMI,AVX512VLVBMI-FAST-ALL 11; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512vbmi,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=ALL,AVX2OR512VL,AVX512VL,AVX512VLVBMI,AVX512VLVBMI-FAST-PERLANE 12; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx | FileCheck %s --check-prefixes=ALL,XOP,XOPAVX1 13; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx2 | FileCheck %s --check-prefixes=ALL,XOP,XOPAVX2 14 15define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) { 16; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 17; AVX1: # %bb.0: 18; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 19; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0 20; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 21; AVX1-NEXT: retq 22; 23; AVX2OR512VL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 24; AVX2OR512VL: # %bb.0: 25; AVX2OR512VL-NEXT: vpbroadcastb %xmm0, %ymm0 26; AVX2OR512VL-NEXT: retq 27; 28; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 29; XOPAVX1: # %bb.0: 30; XOPAVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 31; XOPAVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0 32; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 33; XOPAVX1-NEXT: retq 34; 35; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 36; XOPAVX2: # %bb.0: 37; XOPAVX2-NEXT: vpbroadcastb %xmm0, %ymm0 38; XOPAVX2-NEXT: retq 39 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 40 ret <32 x i8> %shuffle 41} 42 43define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00(<32 x i8> %a, <32 x i8> %b) { 44; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00: 45; AVX1: # %bb.0: 46; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 47; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1 48; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0] 49; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 50; AVX1-NEXT: retq 51; 52; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00: 53; AVX2: # %bb.0: 54; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0] 55; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1] 56; AVX2-NEXT: retq 57; 58; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00: 59; AVX512VLBW: # %bb.0: 60; AVX512VLBW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0] 61; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1] 62; AVX512VLBW-NEXT: retq 63; 64; AVX512VLVBMI-SLOW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00: 65; AVX512VLVBMI-SLOW: # %bb.0: 66; AVX512VLVBMI-SLOW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0] 67; AVX512VLVBMI-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1] 68; AVX512VLVBMI-SLOW-NEXT: retq 69; 70; AVX512VLVBMI-FAST-ALL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00: 71; AVX512VLVBMI-FAST-ALL: # %bb.0: 72; AVX512VLVBMI-FAST-ALL-NEXT: vpmovsxbw {{.*#+}} ymm1 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1] 73; AVX512VLVBMI-FAST-ALL-NEXT: vpermb %ymm0, %ymm1, %ymm0 74; AVX512VLVBMI-FAST-ALL-NEXT: retq 75; 76; AVX512VLVBMI-FAST-PERLANE-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00: 77; AVX512VLVBMI-FAST-PERLANE: # %bb.0: 78; AVX512VLVBMI-FAST-PERLANE-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0] 79; AVX512VLVBMI-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1] 80; AVX512VLVBMI-FAST-PERLANE-NEXT: retq 81; 82; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00: 83; XOPAVX1: # %bb.0: 84; XOPAVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 85; XOPAVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1 86; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0] 87; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 88; XOPAVX1-NEXT: retq 89; 90; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00: 91; XOPAVX2: # %bb.0: 92; XOPAVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0] 93; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1] 94; XOPAVX2-NEXT: retq 95 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0> 96 ret <32 x i8> %shuffle 97} 98 99define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00(<32 x i8> %a, <32 x i8> %b) { 100; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00: 101; AVX1: # %bb.0: 102; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 103; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1 104; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0] 105; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 106; AVX1-NEXT: retq 107; 108; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00: 109; AVX2: # %bb.0: 110; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0] 111; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1] 112; AVX2-NEXT: retq 113; 114; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00: 115; AVX512VLBW: # %bb.0: 116; AVX512VLBW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0] 117; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1] 118; AVX512VLBW-NEXT: retq 119; 120; AVX512VLVBMI-SLOW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00: 121; AVX512VLVBMI-SLOW: # %bb.0: 122; AVX512VLVBMI-SLOW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0] 123; AVX512VLVBMI-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1] 124; AVX512VLVBMI-SLOW-NEXT: retq 125; 126; AVX512VLVBMI-FAST-ALL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00: 127; AVX512VLVBMI-FAST-ALL: # %bb.0: 128; AVX512VLVBMI-FAST-ALL-NEXT: vpmovsxwd {{.*#+}} ymm1 = [0,0,0,0,0,0,0,512] 129; AVX512VLVBMI-FAST-ALL-NEXT: vpermb %ymm0, %ymm1, %ymm0 130; AVX512VLVBMI-FAST-ALL-NEXT: retq 131; 132; AVX512VLVBMI-FAST-PERLANE-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00: 133; AVX512VLVBMI-FAST-PERLANE: # %bb.0: 134; AVX512VLVBMI-FAST-PERLANE-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0] 135; AVX512VLVBMI-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1] 136; AVX512VLVBMI-FAST-PERLANE-NEXT: retq 137; 138; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00: 139; XOPAVX1: # %bb.0: 140; XOPAVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 141; XOPAVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1 142; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0] 143; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 144; XOPAVX1-NEXT: retq 145; 146; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00: 147; XOPAVX2: # %bb.0: 148; XOPAVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0] 149; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1] 150; XOPAVX2-NEXT: retq 151 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0> 152 ret <32 x i8> %shuffle 153} 154 155define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_03_00_00_00(<32 x i8> %a, <32 x i8> %b) { 156; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_03_00_00_00: 157; AVX1: # %bb.0: 158; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 159; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1 160; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0] 161; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 162; AVX1-NEXT: retq 163; 164; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_03_00_00_00: 165; AVX2: # %bb.0: 166; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0] 167; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1] 168; AVX2-NEXT: retq 169; 170; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_03_00_00_00: 171; AVX512VLBW: # %bb.0: 172; AVX512VLBW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0] 173; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1] 174; AVX512VLBW-NEXT: retq 175; 176; AVX512VLVBMI-SLOW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_03_00_00_00: 177; AVX512VLVBMI-SLOW: # %bb.0: 178; AVX512VLVBMI-SLOW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0] 179; AVX512VLVBMI-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1] 180; AVX512VLVBMI-SLOW-NEXT: retq 181; 182; AVX512VLVBMI-FAST-ALL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_03_00_00_00: 183; AVX512VLVBMI-FAST-ALL: # %bb.0: 184; AVX512VLVBMI-FAST-ALL-NEXT: vpmovsxbd {{.*#+}} ymm1 = [0,0,0,0,0,0,0,3] 185; AVX512VLVBMI-FAST-ALL-NEXT: vpermb %ymm0, %ymm1, %ymm0 186; AVX512VLVBMI-FAST-ALL-NEXT: retq 187; 188; AVX512VLVBMI-FAST-PERLANE-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_03_00_00_00: 189; AVX512VLVBMI-FAST-PERLANE: # %bb.0: 190; AVX512VLVBMI-FAST-PERLANE-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0] 191; AVX512VLVBMI-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1] 192; AVX512VLVBMI-FAST-PERLANE-NEXT: retq 193; 194; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_03_00_00_00: 195; XOPAVX1: # %bb.0: 196; XOPAVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 197; XOPAVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1 198; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0] 199; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 200; XOPAVX1-NEXT: retq 201; 202; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_03_00_00_00: 203; XOPAVX2: # %bb.0: 204; XOPAVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0] 205; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1] 206; XOPAVX2-NEXT: retq 207 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0> 208 ret <32 x i8> %shuffle 209} 210 211define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_04_00_00_00_00(<32 x i8> %a, <32 x i8> %b) { 212; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_04_00_00_00_00: 213; AVX1: # %bb.0: 214; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 215; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1 216; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0] 217; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 218; AVX1-NEXT: retq 219; 220; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_04_00_00_00_00: 221; AVX2: # %bb.0: 222; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0] 223; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1] 224; AVX2-NEXT: retq 225; 226; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_04_00_00_00_00: 227; AVX512VLBW: # %bb.0: 228; AVX512VLBW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0] 229; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1] 230; AVX512VLBW-NEXT: retq 231; 232; AVX512VLVBMI-SLOW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_04_00_00_00_00: 233; AVX512VLVBMI-SLOW: # %bb.0: 234; AVX512VLVBMI-SLOW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0] 235; AVX512VLVBMI-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1] 236; AVX512VLVBMI-SLOW-NEXT: retq 237; 238; AVX512VLVBMI-FAST-ALL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_04_00_00_00_00: 239; AVX512VLVBMI-FAST-ALL: # %bb.0: 240; AVX512VLVBMI-FAST-ALL-NEXT: vpmovsxdq {{.*#+}} ymm1 = [0,0,0,67108864] 241; AVX512VLVBMI-FAST-ALL-NEXT: vpermb %ymm0, %ymm1, %ymm0 242; AVX512VLVBMI-FAST-ALL-NEXT: retq 243; 244; AVX512VLVBMI-FAST-PERLANE-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_04_00_00_00_00: 245; AVX512VLVBMI-FAST-PERLANE: # %bb.0: 246; AVX512VLVBMI-FAST-PERLANE-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0] 247; AVX512VLVBMI-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1] 248; AVX512VLVBMI-FAST-PERLANE-NEXT: retq 249; 250; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_04_00_00_00_00: 251; XOPAVX1: # %bb.0: 252; XOPAVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 253; XOPAVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1 254; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0] 255; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 256; XOPAVX1-NEXT: retq 257; 258; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_04_00_00_00_00: 259; XOPAVX2: # %bb.0: 260; XOPAVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0] 261; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1] 262; XOPAVX2-NEXT: retq 263 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0> 264 ret <32 x i8> %shuffle 265} 266 267define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_05_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) { 268; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_05_00_00_00_00_00: 269; AVX1: # %bb.0: 270; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 271; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1 272; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0] 273; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 274; AVX1-NEXT: retq 275; 276; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_05_00_00_00_00_00: 277; AVX2: # %bb.0: 278; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0] 279; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1] 280; AVX2-NEXT: retq 281; 282; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_05_00_00_00_00_00: 283; AVX512VLBW: # %bb.0: 284; AVX512VLBW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0] 285; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1] 286; AVX512VLBW-NEXT: retq 287; 288; AVX512VLVBMI-SLOW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_05_00_00_00_00_00: 289; AVX512VLVBMI-SLOW: # %bb.0: 290; AVX512VLVBMI-SLOW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0] 291; AVX512VLVBMI-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1] 292; AVX512VLVBMI-SLOW-NEXT: retq 293; 294; AVX512VLVBMI-FAST-ALL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_05_00_00_00_00_00: 295; AVX512VLVBMI-FAST-ALL: # %bb.0: 296; AVX512VLVBMI-FAST-ALL-NEXT: vpmovsxbw {{.*#+}} ymm1 = [0,0,0,0,0,0,0,0,0,0,0,0,0,5,0,0] 297; AVX512VLVBMI-FAST-ALL-NEXT: vpermb %ymm0, %ymm1, %ymm0 298; AVX512VLVBMI-FAST-ALL-NEXT: retq 299; 300; AVX512VLVBMI-FAST-PERLANE-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_05_00_00_00_00_00: 301; AVX512VLVBMI-FAST-PERLANE: # %bb.0: 302; AVX512VLVBMI-FAST-PERLANE-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0] 303; AVX512VLVBMI-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1] 304; AVX512VLVBMI-FAST-PERLANE-NEXT: retq 305; 306; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_05_00_00_00_00_00: 307; XOPAVX1: # %bb.0: 308; XOPAVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 309; XOPAVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1 310; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0] 311; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 312; XOPAVX1-NEXT: retq 313; 314; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_05_00_00_00_00_00: 315; XOPAVX2: # %bb.0: 316; XOPAVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0] 317; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1] 318; XOPAVX2-NEXT: retq 319 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0> 320 ret <32 x i8> %shuffle 321} 322 323define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_06_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) { 324; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_06_00_00_00_00_00_00: 325; AVX1: # %bb.0: 326; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 327; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1 328; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0] 329; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 330; AVX1-NEXT: retq 331; 332; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_06_00_00_00_00_00_00: 333; AVX2: # %bb.0: 334; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0] 335; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1] 336; AVX2-NEXT: retq 337; 338; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_06_00_00_00_00_00_00: 339; AVX512VLBW: # %bb.0: 340; AVX512VLBW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0] 341; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1] 342; AVX512VLBW-NEXT: retq 343; 344; AVX512VLVBMI-SLOW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_06_00_00_00_00_00_00: 345; AVX512VLVBMI-SLOW: # %bb.0: 346; AVX512VLVBMI-SLOW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0] 347; AVX512VLVBMI-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1] 348; AVX512VLVBMI-SLOW-NEXT: retq 349; 350; AVX512VLVBMI-FAST-ALL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_06_00_00_00_00_00_00: 351; AVX512VLVBMI-FAST-ALL: # %bb.0: 352; AVX512VLVBMI-FAST-ALL-NEXT: vpmovsxwq {{.*#+}} ymm1 = [0,0,0,1536] 353; AVX512VLVBMI-FAST-ALL-NEXT: vpermb %ymm0, %ymm1, %ymm0 354; AVX512VLVBMI-FAST-ALL-NEXT: retq 355; 356; AVX512VLVBMI-FAST-PERLANE-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_06_00_00_00_00_00_00: 357; AVX512VLVBMI-FAST-PERLANE: # %bb.0: 358; AVX512VLVBMI-FAST-PERLANE-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0] 359; AVX512VLVBMI-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1] 360; AVX512VLVBMI-FAST-PERLANE-NEXT: retq 361; 362; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_06_00_00_00_00_00_00: 363; XOPAVX1: # %bb.0: 364; XOPAVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 365; XOPAVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1 366; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0] 367; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 368; XOPAVX1-NEXT: retq 369; 370; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_06_00_00_00_00_00_00: 371; XOPAVX2: # %bb.0: 372; XOPAVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0] 373; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1] 374; XOPAVX2-NEXT: retq 375 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 376 ret <32 x i8> %shuffle 377} 378 379define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) { 380; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00: 381; AVX1: # %bb.0: 382; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 383; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1 384; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0] 385; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 386; AVX1-NEXT: retq 387; 388; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00: 389; AVX2: # %bb.0: 390; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0] 391; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1] 392; AVX2-NEXT: retq 393; 394; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00: 395; AVX512VLBW: # %bb.0: 396; AVX512VLBW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0] 397; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1] 398; AVX512VLBW-NEXT: retq 399; 400; AVX512VLVBMI-SLOW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00: 401; AVX512VLVBMI-SLOW: # %bb.0: 402; AVX512VLVBMI-SLOW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0] 403; AVX512VLVBMI-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1] 404; AVX512VLVBMI-SLOW-NEXT: retq 405; 406; AVX512VLVBMI-FAST-ALL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00: 407; AVX512VLVBMI-FAST-ALL: # %bb.0: 408; AVX512VLVBMI-FAST-ALL-NEXT: vpmovsxbq {{.*#+}} ymm1 = [0,0,0,7] 409; AVX512VLVBMI-FAST-ALL-NEXT: vpermb %ymm0, %ymm1, %ymm0 410; AVX512VLVBMI-FAST-ALL-NEXT: retq 411; 412; AVX512VLVBMI-FAST-PERLANE-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00: 413; AVX512VLVBMI-FAST-PERLANE: # %bb.0: 414; AVX512VLVBMI-FAST-PERLANE-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0] 415; AVX512VLVBMI-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1] 416; AVX512VLVBMI-FAST-PERLANE-NEXT: retq 417; 418; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00: 419; XOPAVX1: # %bb.0: 420; XOPAVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 421; XOPAVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1 422; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0] 423; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 424; XOPAVX1-NEXT: retq 425; 426; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00: 427; XOPAVX2: # %bb.0: 428; XOPAVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0] 429; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1] 430; XOPAVX2-NEXT: retq 431 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 432 ret <32 x i8> %shuffle 433} 434 435define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) { 436; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00: 437; AVX1: # %bb.0: 438; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 439; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1 440; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0] 441; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 442; AVX1-NEXT: retq 443; 444; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00: 445; AVX2: # %bb.0: 446; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8] 447; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0] 448; AVX2-NEXT: retq 449; 450; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00: 451; AVX512VLBW: # %bb.0: 452; AVX512VLBW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8] 453; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0] 454; AVX512VLBW-NEXT: retq 455; 456; AVX512VLVBMI-SLOW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00: 457; AVX512VLVBMI-SLOW: # %bb.0: 458; AVX512VLVBMI-SLOW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8] 459; AVX512VLVBMI-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0] 460; AVX512VLVBMI-SLOW-NEXT: retq 461; 462; AVX512VLVBMI-FAST-ALL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00: 463; AVX512VLVBMI-FAST-ALL: # %bb.0: 464; AVX512VLVBMI-FAST-ALL-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0] 465; AVX512VLVBMI-FAST-ALL-NEXT: vpermb %ymm0, %ymm1, %ymm0 466; AVX512VLVBMI-FAST-ALL-NEXT: retq 467; 468; AVX512VLVBMI-FAST-PERLANE-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00: 469; AVX512VLVBMI-FAST-PERLANE: # %bb.0: 470; AVX512VLVBMI-FAST-PERLANE-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8] 471; AVX512VLVBMI-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0] 472; AVX512VLVBMI-FAST-PERLANE-NEXT: retq 473; 474; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00: 475; XOPAVX1: # %bb.0: 476; XOPAVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 477; XOPAVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1 478; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0] 479; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 480; XOPAVX1-NEXT: retq 481; 482; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00: 483; XOPAVX2: # %bb.0: 484; XOPAVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8] 485; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0] 486; XOPAVX2-NEXT: retq 487 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 488 ret <32 x i8> %shuffle 489} 490 491define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) { 492; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_00: 493; AVX1: # %bb.0: 494; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 495; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1 496; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,9,0,0,0,0,0,0,0,0,0] 497; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 498; AVX1-NEXT: retq 499; 500; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_00: 501; AVX2: # %bb.0: 502; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,0] 503; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0] 504; AVX2-NEXT: retq 505; 506; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_00: 507; AVX512VLBW: # %bb.0: 508; AVX512VLBW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,0] 509; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0] 510; AVX512VLBW-NEXT: retq 511; 512; AVX512VLVBMI-SLOW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_00: 513; AVX512VLVBMI-SLOW: # %bb.0: 514; AVX512VLVBMI-SLOW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,0] 515; AVX512VLVBMI-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0] 516; AVX512VLVBMI-SLOW-NEXT: retq 517; 518; AVX512VLVBMI-FAST-ALL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_00: 519; AVX512VLVBMI-FAST-ALL: # %bb.0: 520; AVX512VLVBMI-FAST-ALL-NEXT: vpmovsxbw {{.*#+}} ymm1 = [0,0,0,0,0,0,0,0,0,0,0,9,0,0,0,0] 521; AVX512VLVBMI-FAST-ALL-NEXT: vpermb %ymm0, %ymm1, %ymm0 522; AVX512VLVBMI-FAST-ALL-NEXT: retq 523; 524; AVX512VLVBMI-FAST-PERLANE-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_00: 525; AVX512VLVBMI-FAST-PERLANE: # %bb.0: 526; AVX512VLVBMI-FAST-PERLANE-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,0] 527; AVX512VLVBMI-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0] 528; AVX512VLVBMI-FAST-PERLANE-NEXT: retq 529; 530; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_00: 531; XOPAVX1: # %bb.0: 532; XOPAVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 533; XOPAVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1 534; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,9,0,0,0,0,0,0,0,0,0] 535; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 536; XOPAVX1-NEXT: retq 537; 538; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_00: 539; XOPAVX2: # %bb.0: 540; XOPAVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,0] 541; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0] 542; XOPAVX2-NEXT: retq 543 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 9, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 544 ret <32 x i8> %shuffle 545} 546 547define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) { 548; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_00: 549; AVX1: # %bb.0: 550; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 551; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1 552; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,10,0,0,0,0,0,0,0,0,0,0] 553; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 554; AVX1-NEXT: retq 555; 556; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_00: 557; AVX2: # %bb.0: 558; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,10,0,0] 559; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0] 560; AVX2-NEXT: retq 561; 562; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_00: 563; AVX512VLBW: # %bb.0: 564; AVX512VLBW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,10,0,0] 565; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0] 566; AVX512VLBW-NEXT: retq 567; 568; AVX512VLVBMI-SLOW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_00: 569; AVX512VLVBMI-SLOW: # %bb.0: 570; AVX512VLVBMI-SLOW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,10,0,0] 571; AVX512VLVBMI-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0] 572; AVX512VLVBMI-SLOW-NEXT: retq 573; 574; AVX512VLVBMI-FAST-ALL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_00: 575; AVX512VLVBMI-FAST-ALL: # %bb.0: 576; AVX512VLVBMI-FAST-ALL-NEXT: vpmovsxwd {{.*#+}} ymm1 = [0,0,0,0,0,2560,0,0] 577; AVX512VLVBMI-FAST-ALL-NEXT: vpermb %ymm0, %ymm1, %ymm0 578; AVX512VLVBMI-FAST-ALL-NEXT: retq 579; 580; AVX512VLVBMI-FAST-PERLANE-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_00: 581; AVX512VLVBMI-FAST-PERLANE: # %bb.0: 582; AVX512VLVBMI-FAST-PERLANE-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,10,0,0] 583; AVX512VLVBMI-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0] 584; AVX512VLVBMI-FAST-PERLANE-NEXT: retq 585; 586; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_00: 587; XOPAVX1: # %bb.0: 588; XOPAVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 589; XOPAVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1 590; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,10,0,0,0,0,0,0,0,0,0,0] 591; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 592; XOPAVX1-NEXT: retq 593; 594; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_00: 595; XOPAVX2: # %bb.0: 596; XOPAVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,10,0,0] 597; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0] 598; XOPAVX2-NEXT: retq 599 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 10, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 600 ret <32 x i8> %shuffle 601} 602 603define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) { 604; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_00: 605; AVX1: # %bb.0: 606; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 607; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1 608; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,11,0,0,0,0,0,0,0,0,0,0,0] 609; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 610; AVX1-NEXT: retq 611; 612; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_00: 613; AVX2: # %bb.0: 614; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,11,0,0,0] 615; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0] 616; AVX2-NEXT: retq 617; 618; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_00: 619; AVX512VLBW: # %bb.0: 620; AVX512VLBW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,11,0,0,0] 621; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0] 622; AVX512VLBW-NEXT: retq 623; 624; AVX512VLVBMI-SLOW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_00: 625; AVX512VLVBMI-SLOW: # %bb.0: 626; AVX512VLVBMI-SLOW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,11,0,0,0] 627; AVX512VLVBMI-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0] 628; AVX512VLVBMI-SLOW-NEXT: retq 629; 630; AVX512VLVBMI-FAST-ALL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_00: 631; AVX512VLVBMI-FAST-ALL: # %bb.0: 632; AVX512VLVBMI-FAST-ALL-NEXT: vpmovsxbd {{.*#+}} ymm1 = [0,0,0,0,0,11,0,0] 633; AVX512VLVBMI-FAST-ALL-NEXT: vpermb %ymm0, %ymm1, %ymm0 634; AVX512VLVBMI-FAST-ALL-NEXT: retq 635; 636; AVX512VLVBMI-FAST-PERLANE-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_00: 637; AVX512VLVBMI-FAST-PERLANE: # %bb.0: 638; AVX512VLVBMI-FAST-PERLANE-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,11,0,0,0] 639; AVX512VLVBMI-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0] 640; AVX512VLVBMI-FAST-PERLANE-NEXT: retq 641; 642; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_00: 643; XOPAVX1: # %bb.0: 644; XOPAVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 645; XOPAVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1 646; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,11,0,0,0,0,0,0,0,0,0,0,0] 647; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 648; XOPAVX1-NEXT: retq 649; 650; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_00: 651; XOPAVX2: # %bb.0: 652; XOPAVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,11,0,0,0] 653; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0] 654; XOPAVX2-NEXT: retq 655 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 11, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 656 ret <32 x i8> %shuffle 657} 658 659define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_12_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) { 660; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_12_00_00_00_00_00_00_00_00_00_00_00_00: 661; AVX1: # %bb.0: 662; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 663; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1 664; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,12,0,0,0,0,0,0,0,0,0,0,0,0] 665; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 666; AVX1-NEXT: retq 667; 668; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_12_00_00_00_00_00_00_00_00_00_00_00_00: 669; AVX2: # %bb.0: 670; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,12,0,0,0,0] 671; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0] 672; AVX2-NEXT: retq 673; 674; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_12_00_00_00_00_00_00_00_00_00_00_00_00: 675; AVX512VLBW: # %bb.0: 676; AVX512VLBW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,12,0,0,0,0] 677; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0] 678; AVX512VLBW-NEXT: retq 679; 680; AVX512VLVBMI-SLOW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_12_00_00_00_00_00_00_00_00_00_00_00_00: 681; AVX512VLVBMI-SLOW: # %bb.0: 682; AVX512VLVBMI-SLOW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,12,0,0,0,0] 683; AVX512VLVBMI-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0] 684; AVX512VLVBMI-SLOW-NEXT: retq 685; 686; AVX512VLVBMI-FAST-ALL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_12_00_00_00_00_00_00_00_00_00_00_00_00: 687; AVX512VLVBMI-FAST-ALL: # %bb.0: 688; AVX512VLVBMI-FAST-ALL-NEXT: vpmovsxdq {{.*#+}} ymm1 = [0,0,201326592,0] 689; AVX512VLVBMI-FAST-ALL-NEXT: vpermb %ymm0, %ymm1, %ymm0 690; AVX512VLVBMI-FAST-ALL-NEXT: retq 691; 692; AVX512VLVBMI-FAST-PERLANE-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_12_00_00_00_00_00_00_00_00_00_00_00_00: 693; AVX512VLVBMI-FAST-PERLANE: # %bb.0: 694; AVX512VLVBMI-FAST-PERLANE-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,12,0,0,0,0] 695; AVX512VLVBMI-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0] 696; AVX512VLVBMI-FAST-PERLANE-NEXT: retq 697; 698; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_12_00_00_00_00_00_00_00_00_00_00_00_00: 699; XOPAVX1: # %bb.0: 700; XOPAVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 701; XOPAVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1 702; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,12,0,0,0,0,0,0,0,0,0,0,0,0] 703; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 704; XOPAVX1-NEXT: retq 705; 706; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_12_00_00_00_00_00_00_00_00_00_00_00_00: 707; XOPAVX2: # %bb.0: 708; XOPAVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,12,0,0,0,0] 709; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0] 710; XOPAVX2-NEXT: retq 711 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 12, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 712 ret <32 x i8> %shuffle 713} 714 715define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_13_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) { 716; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_13_00_00_00_00_00_00_00_00_00_00_00_00_00: 717; AVX1: # %bb.0: 718; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 719; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1 720; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,13,0,0,0,0,0,0,0,0,0,0,0,0,0] 721; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 722; AVX1-NEXT: retq 723; 724; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_13_00_00_00_00_00_00_00_00_00_00_00_00_00: 725; AVX2: # %bb.0: 726; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,13,0,0,0,0,0] 727; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0] 728; AVX2-NEXT: retq 729; 730; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_13_00_00_00_00_00_00_00_00_00_00_00_00_00: 731; AVX512VLBW: # %bb.0: 732; AVX512VLBW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,13,0,0,0,0,0] 733; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0] 734; AVX512VLBW-NEXT: retq 735; 736; AVX512VLVBMI-SLOW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_13_00_00_00_00_00_00_00_00_00_00_00_00_00: 737; AVX512VLVBMI-SLOW: # %bb.0: 738; AVX512VLVBMI-SLOW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,13,0,0,0,0,0] 739; AVX512VLVBMI-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0] 740; AVX512VLVBMI-SLOW-NEXT: retq 741; 742; AVX512VLVBMI-FAST-ALL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_13_00_00_00_00_00_00_00_00_00_00_00_00_00: 743; AVX512VLVBMI-FAST-ALL: # %bb.0: 744; AVX512VLVBMI-FAST-ALL-NEXT: vpmovsxbw {{.*#+}} ymm1 = [0,0,0,0,0,0,0,0,0,13,0,0,0,0,0,0] 745; AVX512VLVBMI-FAST-ALL-NEXT: vpermb %ymm0, %ymm1, %ymm0 746; AVX512VLVBMI-FAST-ALL-NEXT: retq 747; 748; AVX512VLVBMI-FAST-PERLANE-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_13_00_00_00_00_00_00_00_00_00_00_00_00_00: 749; AVX512VLVBMI-FAST-PERLANE: # %bb.0: 750; AVX512VLVBMI-FAST-PERLANE-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,13,0,0,0,0,0] 751; AVX512VLVBMI-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0] 752; AVX512VLVBMI-FAST-PERLANE-NEXT: retq 753; 754; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_13_00_00_00_00_00_00_00_00_00_00_00_00_00: 755; XOPAVX1: # %bb.0: 756; XOPAVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 757; XOPAVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1 758; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,13,0,0,0,0,0,0,0,0,0,0,0,0,0] 759; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 760; XOPAVX1-NEXT: retq 761; 762; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_13_00_00_00_00_00_00_00_00_00_00_00_00_00: 763; XOPAVX2: # %bb.0: 764; XOPAVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,13,0,0,0,0,0] 765; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0] 766; XOPAVX2-NEXT: retq 767 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 13, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 768 ret <32 x i8> %shuffle 769} 770 771define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) { 772; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 773; AVX1: # %bb.0: 774; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 775; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1 776; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0] 777; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 778; AVX1-NEXT: retq 779; 780; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 781; AVX2: # %bb.0: 782; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,14,0,0,0,0,0,0] 783; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0] 784; AVX2-NEXT: retq 785; 786; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 787; AVX512VLBW: # %bb.0: 788; AVX512VLBW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,14,0,0,0,0,0,0] 789; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0] 790; AVX512VLBW-NEXT: retq 791; 792; AVX512VLVBMI-SLOW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 793; AVX512VLVBMI-SLOW: # %bb.0: 794; AVX512VLVBMI-SLOW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,14,0,0,0,0,0,0] 795; AVX512VLVBMI-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0] 796; AVX512VLVBMI-SLOW-NEXT: retq 797; 798; AVX512VLVBMI-FAST-ALL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 799; AVX512VLVBMI-FAST-ALL: # %bb.0: 800; AVX512VLVBMI-FAST-ALL-NEXT: vpmovsxwq {{.*#+}} ymm1 = [0,0,3584,0] 801; AVX512VLVBMI-FAST-ALL-NEXT: vpermb %ymm0, %ymm1, %ymm0 802; AVX512VLVBMI-FAST-ALL-NEXT: retq 803; 804; AVX512VLVBMI-FAST-PERLANE-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 805; AVX512VLVBMI-FAST-PERLANE: # %bb.0: 806; AVX512VLVBMI-FAST-PERLANE-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,14,0,0,0,0,0,0] 807; AVX512VLVBMI-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0] 808; AVX512VLVBMI-FAST-PERLANE-NEXT: retq 809; 810; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 811; XOPAVX1: # %bb.0: 812; XOPAVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 813; XOPAVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1 814; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0] 815; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 816; XOPAVX1-NEXT: retq 817; 818; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 819; XOPAVX2: # %bb.0: 820; XOPAVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,14,0,0,0,0,0,0] 821; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0] 822; XOPAVX2-NEXT: retq 823 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 14, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 824 ret <32 x i8> %shuffle 825} 826 827define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) { 828; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 829; AVX1: # %bb.0: 830; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 831; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1 832; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0] 833; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 834; AVX1-NEXT: retq 835; 836; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 837; AVX2: # %bb.0: 838; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,15,0,0,0,0,0,0,0] 839; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0] 840; AVX2-NEXT: retq 841; 842; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 843; AVX512VLBW: # %bb.0: 844; AVX512VLBW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,15,0,0,0,0,0,0,0] 845; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0] 846; AVX512VLBW-NEXT: retq 847; 848; AVX512VLVBMI-SLOW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 849; AVX512VLVBMI-SLOW: # %bb.0: 850; AVX512VLVBMI-SLOW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,15,0,0,0,0,0,0,0] 851; AVX512VLVBMI-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0] 852; AVX512VLVBMI-SLOW-NEXT: retq 853; 854; AVX512VLVBMI-FAST-ALL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 855; AVX512VLVBMI-FAST-ALL: # %bb.0: 856; AVX512VLVBMI-FAST-ALL-NEXT: vpmovsxbq {{.*#+}} ymm1 = [0,0,15,0] 857; AVX512VLVBMI-FAST-ALL-NEXT: vpermb %ymm0, %ymm1, %ymm0 858; AVX512VLVBMI-FAST-ALL-NEXT: retq 859; 860; AVX512VLVBMI-FAST-PERLANE-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 861; AVX512VLVBMI-FAST-PERLANE: # %bb.0: 862; AVX512VLVBMI-FAST-PERLANE-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,15,0,0,0,0,0,0,0] 863; AVX512VLVBMI-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0] 864; AVX512VLVBMI-FAST-PERLANE-NEXT: retq 865; 866; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 867; XOPAVX1: # %bb.0: 868; XOPAVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 869; XOPAVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1 870; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0] 871; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 872; XOPAVX1-NEXT: retq 873; 874; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 875; XOPAVX2: # %bb.0: 876; XOPAVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,15,0,0,0,0,0,0,0] 877; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0] 878; XOPAVX2-NEXT: retq 879 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 880 ret <32 x i8> %shuffle 881} 882 883define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) { 884; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 885; AVX1: # %bb.0: 886; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 887; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1 888; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 889; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] 890; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1] 891; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 892; AVX1-NEXT: retq 893; 894; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 895; AVX2: # %bb.0: 896; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3] 897; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] 898; AVX2-NEXT: retq 899; 900; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 901; AVX512VLBW: # %bb.0: 902; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3] 903; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] 904; AVX512VLBW-NEXT: retq 905; 906; AVX512VLVBMI-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 907; AVX512VLVBMI: # %bb.0: 908; AVX512VLVBMI-NEXT: vmovdqa {{.*#+}} xmm1 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,16] 909; AVX512VLVBMI-NEXT: vpermb %ymm0, %ymm1, %ymm0 910; AVX512VLVBMI-NEXT: retq 911; 912; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 913; XOPAVX1: # %bb.0: 914; XOPAVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 915; XOPAVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1 916; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 917; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],xmm2[0] 918; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 919; XOPAVX1-NEXT: retq 920; 921; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 922; XOPAVX2: # %bb.0: 923; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3] 924; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] 925; XOPAVX2-NEXT: retq 926 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 927 ret <32 x i8> %shuffle 928} 929 930define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_17_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) { 931; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_17_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 932; AVX1: # %bb.0: 933; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 934; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1 935; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 936; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] 937; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0] 938; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 939; AVX1-NEXT: retq 940; 941; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_17_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 942; AVX2: # %bb.0: 943; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3] 944; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] 945; AVX2-NEXT: retq 946; 947; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_17_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 948; AVX512VLBW: # %bb.0: 949; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3] 950; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] 951; AVX512VLBW-NEXT: retq 952; 953; AVX512VLVBMI-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_17_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 954; AVX512VLVBMI: # %bb.0: 955; AVX512VLVBMI-NEXT: vpmovsxbw {{.*#+}} xmm1 = [0,0,0,0,0,0,0,17] 956; AVX512VLVBMI-NEXT: vpermb %ymm0, %ymm1, %ymm0 957; AVX512VLVBMI-NEXT: retq 958; 959; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_17_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 960; XOPAVX1: # %bb.0: 961; XOPAVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 962; XOPAVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1 963; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 964; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0],xmm2[1],xmm0[0] 965; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 966; XOPAVX1-NEXT: retq 967; 968; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_17_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 969; XOPAVX2: # %bb.0: 970; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3] 971; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] 972; XOPAVX2-NEXT: retq 973 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 17, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 974 ret <32 x i8> %shuffle 975} 976 977define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_18_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) { 978; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_18_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 979; AVX1: # %bb.0: 980; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 981; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1 982; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 983; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] 984; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,5,0,0] 985; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 986; AVX1-NEXT: retq 987; 988; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_18_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 989; AVX2: # %bb.0: 990; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3] 991; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,0,10,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] 992; AVX2-NEXT: retq 993; 994; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_18_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 995; AVX512VLBW: # %bb.0: 996; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3] 997; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,0,10,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] 998; AVX512VLBW-NEXT: retq 999; 1000; AVX512VLVBMI-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_18_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 1001; AVX512VLVBMI: # %bb.0: 1002; AVX512VLVBMI-NEXT: vpmovsxwd {{.*#+}} xmm1 = [0,0,0,4608] 1003; AVX512VLVBMI-NEXT: vpermb %ymm0, %ymm1, %ymm0 1004; AVX512VLVBMI-NEXT: retq 1005; 1006; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_18_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 1007; XOPAVX1: # %bb.0: 1008; XOPAVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 1009; XOPAVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1 1010; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 1011; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0],xmm2[2],xmm0[0,0] 1012; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1013; XOPAVX1-NEXT: retq 1014; 1015; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_18_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 1016; XOPAVX2: # %bb.0: 1017; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3] 1018; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,0,10,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] 1019; XOPAVX2-NEXT: retq 1020 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 18, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1021 ret <32 x i8> %shuffle 1022} 1023 1024define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_19_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) { 1025; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_19_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 1026; AVX1: # %bb.0: 1027; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 1028; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1 1029; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 1030; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] 1031; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0] 1032; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1033; AVX1-NEXT: retq 1034; 1035; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_19_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 1036; AVX2: # %bb.0: 1037; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3] 1038; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,11,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] 1039; AVX2-NEXT: retq 1040; 1041; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_19_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 1042; AVX512VLBW: # %bb.0: 1043; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3] 1044; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,11,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] 1045; AVX512VLBW-NEXT: retq 1046; 1047; AVX512VLVBMI-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_19_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 1048; AVX512VLVBMI: # %bb.0: 1049; AVX512VLVBMI-NEXT: vpmovsxbd {{.*#+}} xmm1 = [0,0,0,19] 1050; AVX512VLVBMI-NEXT: vpermb %ymm0, %ymm1, %ymm0 1051; AVX512VLVBMI-NEXT: retq 1052; 1053; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_19_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 1054; XOPAVX1: # %bb.0: 1055; XOPAVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 1056; XOPAVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1 1057; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 1058; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0],xmm2[3],xmm0[0,0,0] 1059; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1060; XOPAVX1-NEXT: retq 1061; 1062; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_19_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 1063; XOPAVX2: # %bb.0: 1064; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3] 1065; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,11,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] 1066; XOPAVX2-NEXT: retq 1067 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 19, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1068 ret <32 x i8> %shuffle 1069} 1070 1071define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_20_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) { 1072; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_20_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 1073; AVX1: # %bb.0: 1074; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 1075; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1 1076; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 1077; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] 1078; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,9,0,0,0,0] 1079; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1080; AVX1-NEXT: retq 1081; 1082; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_20_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 1083; AVX2: # %bb.0: 1084; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3] 1085; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,12,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] 1086; AVX2-NEXT: retq 1087; 1088; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_20_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 1089; AVX512VLBW: # %bb.0: 1090; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3] 1091; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,12,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] 1092; AVX512VLBW-NEXT: retq 1093; 1094; AVX512VLVBMI-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_20_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 1095; AVX512VLVBMI: # %bb.0: 1096; AVX512VLVBMI-NEXT: vpmovsxdq {{.*#+}} xmm1 = [0,335544320] 1097; AVX512VLVBMI-NEXT: vpermb %ymm0, %ymm1, %ymm0 1098; AVX512VLVBMI-NEXT: retq 1099; 1100; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_20_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 1101; XOPAVX1: # %bb.0: 1102; XOPAVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 1103; XOPAVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1 1104; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 1105; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0],xmm2[4],xmm0[0,0,0,0] 1106; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1107; XOPAVX1-NEXT: retq 1108; 1109; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_20_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 1110; XOPAVX2: # %bb.0: 1111; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3] 1112; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,12,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] 1113; XOPAVX2-NEXT: retq 1114 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 20, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1115 ret <32 x i8> %shuffle 1116} 1117 1118define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_21_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) { 1119; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_21_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 1120; AVX1: # %bb.0: 1121; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 1122; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1 1123; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 1124; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] 1125; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,11,0,0,0,0,0] 1126; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1127; AVX1-NEXT: retq 1128; 1129; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_21_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 1130; AVX2: # %bb.0: 1131; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3] 1132; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,13,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] 1133; AVX2-NEXT: retq 1134; 1135; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_21_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 1136; AVX512VLBW: # %bb.0: 1137; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3] 1138; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,13,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] 1139; AVX512VLBW-NEXT: retq 1140; 1141; AVX512VLVBMI-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_21_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 1142; AVX512VLVBMI: # %bb.0: 1143; AVX512VLVBMI-NEXT: vpmovsxbw {{.*#+}} xmm1 = [0,0,0,0,0,21,0,0] 1144; AVX512VLVBMI-NEXT: vpermb %ymm0, %ymm1, %ymm0 1145; AVX512VLVBMI-NEXT: retq 1146; 1147; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_21_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 1148; XOPAVX1: # %bb.0: 1149; XOPAVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 1150; XOPAVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1 1151; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 1152; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0],xmm2[5],xmm0[0,0,0,0,0] 1153; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1154; XOPAVX1-NEXT: retq 1155; 1156; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_21_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 1157; XOPAVX2: # %bb.0: 1158; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3] 1159; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,13,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] 1160; XOPAVX2-NEXT: retq 1161 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 21, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1162 ret <32 x i8> %shuffle 1163} 1164 1165define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_22_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) { 1166; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_22_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 1167; AVX1: # %bb.0: 1168; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 1169; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1 1170; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 1171; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] 1172; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,13,0,0,0,0,0,0] 1173; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1174; AVX1-NEXT: retq 1175; 1176; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_22_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 1177; AVX2: # %bb.0: 1178; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3] 1179; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,14,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] 1180; AVX2-NEXT: retq 1181; 1182; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_22_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 1183; AVX512VLBW: # %bb.0: 1184; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3] 1185; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,14,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] 1186; AVX512VLBW-NEXT: retq 1187; 1188; AVX512VLVBMI-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_22_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 1189; AVX512VLVBMI: # %bb.0: 1190; AVX512VLVBMI-NEXT: vpmovsxwq {{.*#+}} xmm1 = [0,5632] 1191; AVX512VLVBMI-NEXT: vpermb %ymm0, %ymm1, %ymm0 1192; AVX512VLVBMI-NEXT: retq 1193; 1194; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_22_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 1195; XOPAVX1: # %bb.0: 1196; XOPAVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 1197; XOPAVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1 1198; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 1199; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0],xmm2[6],xmm0[0,0,0,0,0,0] 1200; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1201; XOPAVX1-NEXT: retq 1202; 1203; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_22_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 1204; XOPAVX2: # %bb.0: 1205; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3] 1206; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,14,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] 1207; XOPAVX2-NEXT: retq 1208 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 22, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1209 ret <32 x i8> %shuffle 1210} 1211 1212define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_23_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) { 1213; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_23_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 1214; AVX1: # %bb.0: 1215; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 1216; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1 1217; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 1218; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] 1219; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,15,0,0,0,0,0,0,0] 1220; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1221; AVX1-NEXT: retq 1222; 1223; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_23_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 1224; AVX2: # %bb.0: 1225; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3] 1226; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,15,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] 1227; AVX2-NEXT: retq 1228; 1229; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_23_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 1230; AVX512VLBW: # %bb.0: 1231; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3] 1232; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,15,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] 1233; AVX512VLBW-NEXT: retq 1234; 1235; AVX512VLVBMI-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_23_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 1236; AVX512VLVBMI: # %bb.0: 1237; AVX512VLVBMI-NEXT: vpmovsxbq {{.*#+}} xmm1 = [0,23] 1238; AVX512VLVBMI-NEXT: vpermb %ymm0, %ymm1, %ymm0 1239; AVX512VLVBMI-NEXT: retq 1240; 1241; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_23_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 1242; XOPAVX1: # %bb.0: 1243; XOPAVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 1244; XOPAVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1 1245; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 1246; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0],xmm2[7],xmm0[0,0,0,0,0,0,0] 1247; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1248; XOPAVX1-NEXT: retq 1249; 1250; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_23_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 1251; XOPAVX2: # %bb.0: 1252; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3] 1253; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,15,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] 1254; XOPAVX2-NEXT: retq 1255 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 23, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1256 ret <32 x i8> %shuffle 1257} 1258 1259define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_24_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) { 1260; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_24_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 1261; AVX1: # %bb.0: 1262; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 1263; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1 1264; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 1265; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm2[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7] 1266; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,8,8,8,8,8,8,0,8,8,8,8,8,8,8,8] 1267; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1268; AVX1-NEXT: retq 1269; 1270; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_24_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 1271; AVX2: # %bb.0: 1272; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3] 1273; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] 1274; AVX2-NEXT: retq 1275; 1276; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_24_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 1277; AVX512VLBW: # %bb.0: 1278; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3] 1279; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] 1280; AVX512VLBW-NEXT: retq 1281; 1282; AVX512VLVBMI-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_24_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 1283; AVX512VLVBMI: # %bb.0: 1284; AVX512VLVBMI-NEXT: vmovq {{.*#+}} xmm1 = [0,0,0,0,0,0,0,24,0,0,0,0,0,0,0,0] 1285; AVX512VLVBMI-NEXT: vpermb %ymm0, %ymm1, %ymm0 1286; AVX512VLVBMI-NEXT: retq 1287; 1288; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_24_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 1289; XOPAVX1: # %bb.0: 1290; XOPAVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 1291; XOPAVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1 1292; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 1293; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0],xmm2[8],xmm0[0,0,0,0,0,0,0,0] 1294; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1295; XOPAVX1-NEXT: retq 1296; 1297; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_24_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 1298; XOPAVX2: # %bb.0: 1299; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3] 1300; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] 1301; XOPAVX2-NEXT: retq 1302 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 24, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1303 ret <32 x i8> %shuffle 1304} 1305 1306define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_25_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) { 1307; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_25_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 1308; AVX1: # %bb.0: 1309; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 1310; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1 1311; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 1312; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm2[9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8] 1313; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[7,7,7,7,7,7,0,7,7,7,7,7,7,7,7,7] 1314; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1315; AVX1-NEXT: retq 1316; 1317; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_25_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 1318; AVX2: # %bb.0: 1319; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3] 1320; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,9,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] 1321; AVX2-NEXT: retq 1322; 1323; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_25_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 1324; AVX512VLBW: # %bb.0: 1325; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3] 1326; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,9,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] 1327; AVX512VLBW-NEXT: retq 1328; 1329; AVX512VLVBMI-LABEL: shuffle_v32i8_00_00_00_00_00_00_25_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 1330; AVX512VLVBMI: # %bb.0: 1331; AVX512VLVBMI-NEXT: vmovq {{.*#+}} xmm1 = [0,0,0,0,0,0,25,0,0,0,0,0,0,0,0,0] 1332; AVX512VLVBMI-NEXT: vpermb %ymm0, %ymm1, %ymm0 1333; AVX512VLVBMI-NEXT: retq 1334; 1335; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_25_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 1336; XOPAVX1: # %bb.0: 1337; XOPAVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 1338; XOPAVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1 1339; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 1340; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0],xmm2[9],xmm0[0,0,0,0,0,0,0,0,0] 1341; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1342; XOPAVX1-NEXT: retq 1343; 1344; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_25_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 1345; XOPAVX2: # %bb.0: 1346; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3] 1347; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,9,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] 1348; XOPAVX2-NEXT: retq 1349 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 25, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1350 ret <32 x i8> %shuffle 1351} 1352 1353define <32 x i8> @shuffle_v32i8_00_00_00_00_00_26_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) { 1354; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_26_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 1355; AVX1: # %bb.0: 1356; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 1357; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1 1358; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 1359; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm2[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9] 1360; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,6,6,6,6,0,6,6,6,6,6,6,6,6,6,6] 1361; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1362; AVX1-NEXT: retq 1363; 1364; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_26_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 1365; AVX2: # %bb.0: 1366; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3] 1367; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,10,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] 1368; AVX2-NEXT: retq 1369; 1370; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_26_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 1371; AVX512VLBW: # %bb.0: 1372; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3] 1373; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,10,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] 1374; AVX512VLBW-NEXT: retq 1375; 1376; AVX512VLVBMI-LABEL: shuffle_v32i8_00_00_00_00_00_26_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 1377; AVX512VLVBMI: # %bb.0: 1378; AVX512VLVBMI-NEXT: vmovq {{.*#+}} xmm1 = [0,0,0,0,0,26,0,0,0,0,0,0,0,0,0,0] 1379; AVX512VLVBMI-NEXT: vpermb %ymm0, %ymm1, %ymm0 1380; AVX512VLVBMI-NEXT: retq 1381; 1382; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_26_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 1383; XOPAVX1: # %bb.0: 1384; XOPAVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 1385; XOPAVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1 1386; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 1387; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[0,0,0,0,0],xmm2[10],xmm0[0,0,0,0,0,0,0,0,0,0] 1388; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1389; XOPAVX1-NEXT: retq 1390; 1391; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_26_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 1392; XOPAVX2: # %bb.0: 1393; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3] 1394; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,10,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] 1395; XOPAVX2-NEXT: retq 1396 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 26, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1397 ret <32 x i8> %shuffle 1398} 1399 1400define <32 x i8> @shuffle_v32i8_00_00_00_00_27_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) { 1401; AVX1-LABEL: shuffle_v32i8_00_00_00_00_27_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 1402; AVX1: # %bb.0: 1403; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 1404; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1 1405; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 1406; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm2[11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10] 1407; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[5,5,5,5,0,5,5,5,5,5,5,5,5,5,5,5] 1408; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1409; AVX1-NEXT: retq 1410; 1411; AVX2-LABEL: shuffle_v32i8_00_00_00_00_27_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 1412; AVX2: # %bb.0: 1413; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3] 1414; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,11,0,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] 1415; AVX2-NEXT: retq 1416; 1417; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_27_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 1418; AVX512VLBW: # %bb.0: 1419; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3] 1420; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,11,0,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] 1421; AVX512VLBW-NEXT: retq 1422; 1423; AVX512VLVBMI-LABEL: shuffle_v32i8_00_00_00_00_27_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 1424; AVX512VLVBMI: # %bb.0: 1425; AVX512VLVBMI-NEXT: vpmovsxbd {{.*#+}} xmm1 = [0,27,0,0] 1426; AVX512VLVBMI-NEXT: vpermb %ymm0, %ymm1, %ymm0 1427; AVX512VLVBMI-NEXT: retq 1428; 1429; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_27_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 1430; XOPAVX1: # %bb.0: 1431; XOPAVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 1432; XOPAVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1 1433; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 1434; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[0,0,0,0],xmm2[11],xmm0[0,0,0,0,0,0,0,0,0,0,0] 1435; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1436; XOPAVX1-NEXT: retq 1437; 1438; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_27_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 1439; XOPAVX2: # %bb.0: 1440; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3] 1441; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,11,0,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] 1442; XOPAVX2-NEXT: retq 1443 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 27, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1444 ret <32 x i8> %shuffle 1445} 1446 1447define <32 x i8> @shuffle_v32i8_00_00_00_28_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) { 1448; AVX1-LABEL: shuffle_v32i8_00_00_00_28_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 1449; AVX1: # %bb.0: 1450; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 1451; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1 1452; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 1453; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm2[12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11] 1454; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,4,4,0,4,4,4,4,4,4,4,4,4,4,4,4] 1455; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1456; AVX1-NEXT: retq 1457; 1458; AVX2-LABEL: shuffle_v32i8_00_00_00_28_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 1459; AVX2: # %bb.0: 1460; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3] 1461; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,12,0,0,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] 1462; AVX2-NEXT: retq 1463; 1464; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_28_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 1465; AVX512VLBW: # %bb.0: 1466; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3] 1467; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,12,0,0,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] 1468; AVX512VLBW-NEXT: retq 1469; 1470; AVX512VLVBMI-LABEL: shuffle_v32i8_00_00_00_28_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 1471; AVX512VLVBMI: # %bb.0: 1472; AVX512VLVBMI-NEXT: vmovd {{.*#+}} xmm1 = [0,0,0,28,0,0,0,0,0,0,0,0,0,0,0,0] 1473; AVX512VLVBMI-NEXT: vpermb %ymm0, %ymm1, %ymm0 1474; AVX512VLVBMI-NEXT: retq 1475; 1476; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_28_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 1477; XOPAVX1: # %bb.0: 1478; XOPAVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 1479; XOPAVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1 1480; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 1481; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[0,0,0],xmm2[12],xmm0[0,0,0,0,0,0,0,0,0,0,0,0] 1482; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1483; XOPAVX1-NEXT: retq 1484; 1485; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_28_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 1486; XOPAVX2: # %bb.0: 1487; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3] 1488; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,12,0,0,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] 1489; XOPAVX2-NEXT: retq 1490 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 28, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1491 ret <32 x i8> %shuffle 1492} 1493 1494define <32 x i8> @shuffle_v32i8_00_00_29_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) { 1495; AVX1-LABEL: shuffle_v32i8_00_00_29_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 1496; AVX1: # %bb.0: 1497; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 1498; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1 1499; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 1500; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm2[13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12] 1501; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[3,3,0,3,3,3,3,3,3,3,3,3,3,3,3,3] 1502; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1503; AVX1-NEXT: retq 1504; 1505; AVX2-LABEL: shuffle_v32i8_00_00_29_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 1506; AVX2: # %bb.0: 1507; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3] 1508; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,13,0,0,0,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] 1509; AVX2-NEXT: retq 1510; 1511; AVX512VLBW-LABEL: shuffle_v32i8_00_00_29_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 1512; AVX512VLBW: # %bb.0: 1513; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3] 1514; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,13,0,0,0,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] 1515; AVX512VLBW-NEXT: retq 1516; 1517; AVX512VLVBMI-LABEL: shuffle_v32i8_00_00_29_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 1518; AVX512VLVBMI: # %bb.0: 1519; AVX512VLVBMI-NEXT: vmovd {{.*#+}} xmm1 = [0,0,29,0,0,0,0,0,0,0,0,0,0,0,0,0] 1520; AVX512VLVBMI-NEXT: vpermb %ymm0, %ymm1, %ymm0 1521; AVX512VLVBMI-NEXT: retq 1522; 1523; XOPAVX1-LABEL: shuffle_v32i8_00_00_29_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 1524; XOPAVX1: # %bb.0: 1525; XOPAVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 1526; XOPAVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1 1527; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 1528; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[0,0],xmm2[13],xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0] 1529; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1530; XOPAVX1-NEXT: retq 1531; 1532; XOPAVX2-LABEL: shuffle_v32i8_00_00_29_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 1533; XOPAVX2: # %bb.0: 1534; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3] 1535; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,13,0,0,0,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] 1536; XOPAVX2-NEXT: retq 1537 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 29, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1538 ret <32 x i8> %shuffle 1539} 1540 1541define <32 x i8> @shuffle_v32i8_00_30_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) { 1542; AVX1-LABEL: shuffle_v32i8_00_30_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 1543; AVX1: # %bb.0: 1544; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 1545; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1 1546; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 1547; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm2[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] 1548; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,0,2,2,2,2,2,2,2,2,2,2,2,2,2,2] 1549; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1550; AVX1-NEXT: retq 1551; 1552; AVX2-LABEL: shuffle_v32i8_00_30_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 1553; AVX2: # %bb.0: 1554; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3] 1555; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] 1556; AVX2-NEXT: retq 1557; 1558; AVX512VLBW-LABEL: shuffle_v32i8_00_30_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 1559; AVX512VLBW: # %bb.0: 1560; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3] 1561; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] 1562; AVX512VLBW-NEXT: retq 1563; 1564; AVX512VLVBMI-LABEL: shuffle_v32i8_00_30_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 1565; AVX512VLVBMI: # %bb.0: 1566; AVX512VLVBMI-NEXT: vmovd {{.*#+}} xmm1 = [0,30,0,0,0,0,0,0,0,0,0,0,0,0,0,0] 1567; AVX512VLVBMI-NEXT: vpermb %ymm0, %ymm1, %ymm0 1568; AVX512VLVBMI-NEXT: retq 1569; 1570; XOPAVX1-LABEL: shuffle_v32i8_00_30_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 1571; XOPAVX1: # %bb.0: 1572; XOPAVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 1573; XOPAVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1 1574; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 1575; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[0],xmm2[14],xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0] 1576; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1577; XOPAVX1-NEXT: retq 1578; 1579; XOPAVX2-LABEL: shuffle_v32i8_00_30_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 1580; XOPAVX2: # %bb.0: 1581; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3] 1582; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] 1583; XOPAVX2-NEXT: retq 1584 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 30, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1585 ret <32 x i8> %shuffle 1586} 1587 1588define <32 x i8> @shuffle_v32i8_31_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) { 1589; AVX1-LABEL: shuffle_v32i8_31_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 1590; AVX1: # %bb.0: 1591; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 1592; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1 1593; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 1594; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm2[15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] 1595; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] 1596; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1597; AVX1-NEXT: retq 1598; 1599; AVX2-LABEL: shuffle_v32i8_31_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 1600; AVX2: # %bb.0: 1601; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,0,0,3] 1602; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[7,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] 1603; AVX2-NEXT: retq 1604; 1605; AVX512VLBW-LABEL: shuffle_v32i8_31_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 1606; AVX512VLBW: # %bb.0: 1607; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,0,0,3] 1608; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[7,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] 1609; AVX512VLBW-NEXT: retq 1610; 1611; AVX512VLVBMI-LABEL: shuffle_v32i8_31_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 1612; AVX512VLVBMI: # %bb.0: 1613; AVX512VLVBMI-NEXT: vpmovsxbq {{.*#+}} xmm1 = [31,0] 1614; AVX512VLVBMI-NEXT: vpermb %ymm0, %ymm1, %ymm0 1615; AVX512VLVBMI-NEXT: retq 1616; 1617; XOPAVX1-LABEL: shuffle_v32i8_31_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 1618; XOPAVX1: # %bb.0: 1619; XOPAVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 1620; XOPAVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1 1621; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 1622; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm2[15],xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0] 1623; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1624; XOPAVX1-NEXT: retq 1625; 1626; XOPAVX2-LABEL: shuffle_v32i8_31_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 1627; XOPAVX2: # %bb.0: 1628; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,0,0,3] 1629; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[7,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] 1630; XOPAVX2-NEXT: retq 1631 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 31, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1632 ret <32 x i8> %shuffle 1633} 1634 1635define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16(<32 x i8> %a, <32 x i8> %b) { 1636; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16: 1637; AVX1: # %bb.0: 1638; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1639; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 1640; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 1641; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 1642; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1643; AVX1-NEXT: retq 1644; 1645; AVX2OR512VL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16: 1646; AVX2OR512VL: # %bb.0: 1647; AVX2OR512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 1648; AVX2OR512VL-NEXT: vpshufb %ymm1, %ymm0, %ymm0 1649; AVX2OR512VL-NEXT: retq 1650; 1651; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16: 1652; XOPAVX1: # %bb.0: 1653; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1654; XOPAVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 1655; XOPAVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 1656; XOPAVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 1657; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1658; XOPAVX1-NEXT: retq 1659; 1660; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16: 1661; XOPAVX2: # %bb.0: 1662; XOPAVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 1663; XOPAVX2-NEXT: vpshufb %ymm1, %ymm0, %ymm0 1664; XOPAVX2-NEXT: retq 1665 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16> 1666 ret <32 x i8> %shuffle 1667} 1668 1669define <32 x i8> @shuffle_v32i8_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15_31_31_31_31_31_31_31_31_31_31_31_31_31_31_31_31(<32 x i8> %a, <32 x i8> %b) { 1670; AVX1-LABEL: shuffle_v32i8_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15_31_31_31_31_31_31_31_31_31_31_31_31_31_31_31_31: 1671; AVX1: # %bb.0: 1672; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1673; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 1674; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 1675; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 1676; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1677; AVX1-NEXT: retq 1678; 1679; AVX2OR512VL-LABEL: shuffle_v32i8_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15_31_31_31_31_31_31_31_31_31_31_31_31_31_31_31_31: 1680; AVX2OR512VL: # %bb.0: 1681; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31] 1682; AVX2OR512VL-NEXT: retq 1683; 1684; XOPAVX1-LABEL: shuffle_v32i8_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15_31_31_31_31_31_31_31_31_31_31_31_31_31_31_31_31: 1685; XOPAVX1: # %bb.0: 1686; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1687; XOPAVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 1688; XOPAVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 1689; XOPAVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 1690; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1691; XOPAVX1-NEXT: retq 1692; 1693; XOPAVX2-LABEL: shuffle_v32i8_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15_31_31_31_31_31_31_31_31_31_31_31_31_31_31_31_31: 1694; XOPAVX2: # %bb.0: 1695; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31] 1696; XOPAVX2-NEXT: retq 1697 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31> 1698 ret <32 x i8> %shuffle 1699} 1700 1701define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24(<32 x i8> %a, <32 x i8> %b) { 1702; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24: 1703; AVX1: # %bb.0: 1704; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1705; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8] 1706; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 1707; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 1708; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1709; AVX1-NEXT: retq 1710; 1711; AVX2OR512VL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24: 1712; AVX2OR512VL: # %bb.0: 1713; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8,16,16,16,16,16,16,16,16,24,24,24,24,24,24,24,24] 1714; AVX2OR512VL-NEXT: retq 1715; 1716; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24: 1717; XOPAVX1: # %bb.0: 1718; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1719; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8] 1720; XOPAVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 1721; XOPAVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 1722; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1723; XOPAVX1-NEXT: retq 1724; 1725; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24: 1726; XOPAVX2: # %bb.0: 1727; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8,16,16,16,16,16,16,16,16,24,24,24,24,24,24,24,24] 1728; XOPAVX2-NEXT: retq 1729 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24> 1730 ret <32 x i8> %shuffle 1731} 1732 1733define <32 x i8> @shuffle_v32i8_07_07_07_07_07_07_07_07_15_15_15_15_15_15_15_15_23_23_23_23_23_23_23_23_31_31_31_31_31_31_31_31(<32 x i8> %a, <32 x i8> %b) { 1734; AVX1-LABEL: shuffle_v32i8_07_07_07_07_07_07_07_07_15_15_15_15_15_15_15_15_23_23_23_23_23_23_23_23_31_31_31_31_31_31_31_31: 1735; AVX1: # %bb.0: 1736; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1737; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,15,15,15,15,15,15,15,15] 1738; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 1739; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 1740; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1741; AVX1-NEXT: retq 1742; 1743; AVX2OR512VL-LABEL: shuffle_v32i8_07_07_07_07_07_07_07_07_15_15_15_15_15_15_15_15_23_23_23_23_23_23_23_23_31_31_31_31_31_31_31_31: 1744; AVX2OR512VL: # %bb.0: 1745; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[7,7,7,7,7,7,7,7,15,15,15,15,15,15,15,15,23,23,23,23,23,23,23,23,31,31,31,31,31,31,31,31] 1746; AVX2OR512VL-NEXT: retq 1747; 1748; XOPAVX1-LABEL: shuffle_v32i8_07_07_07_07_07_07_07_07_15_15_15_15_15_15_15_15_23_23_23_23_23_23_23_23_31_31_31_31_31_31_31_31: 1749; XOPAVX1: # %bb.0: 1750; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1751; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,15,15,15,15,15,15,15,15] 1752; XOPAVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 1753; XOPAVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 1754; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1755; XOPAVX1-NEXT: retq 1756; 1757; XOPAVX2-LABEL: shuffle_v32i8_07_07_07_07_07_07_07_07_15_15_15_15_15_15_15_15_23_23_23_23_23_23_23_23_31_31_31_31_31_31_31_31: 1758; XOPAVX2: # %bb.0: 1759; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[7,7,7,7,7,7,7,7,15,15,15,15,15,15,15,15,23,23,23,23,23,23,23,23,31,31,31,31,31,31,31,31] 1760; XOPAVX2-NEXT: retq 1761 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31> 1762 ret <32 x i8> %shuffle 1763} 1764 1765define <32 x i8> @shuffle_v32i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12_16_16_16_16_20_20_20_20_24_24_24_24_28_28_28_28(<32 x i8> %a, <32 x i8> %b) { 1766; AVX1-LABEL: shuffle_v32i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12_16_16_16_16_20_20_20_20_24_24_24_24_28_28_28_28: 1767; AVX1: # %bb.0: 1768; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1769; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,0,0,0,4,4,4,4,8,8,8,8,12,12,12,12] 1770; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 1771; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 1772; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1773; AVX1-NEXT: retq 1774; 1775; AVX2OR512VL-LABEL: shuffle_v32i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12_16_16_16_16_20_20_20_20_24_24_24_24_28_28_28_28: 1776; AVX2OR512VL: # %bb.0: 1777; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4,8,8,8,8,12,12,12,12,16,16,16,16,20,20,20,20,24,24,24,24,28,28,28,28] 1778; AVX2OR512VL-NEXT: retq 1779; 1780; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12_16_16_16_16_20_20_20_20_24_24_24_24_28_28_28_28: 1781; XOPAVX1: # %bb.0: 1782; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1783; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,0,0,0,4,4,4,4,8,8,8,8,12,12,12,12] 1784; XOPAVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 1785; XOPAVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 1786; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1787; XOPAVX1-NEXT: retq 1788; 1789; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12_16_16_16_16_20_20_20_20_24_24_24_24_28_28_28_28: 1790; XOPAVX2: # %bb.0: 1791; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4,8,8,8,8,12,12,12,12,16,16,16,16,20,20,20,20,24,24,24,24,28,28,28,28] 1792; XOPAVX2-NEXT: retq 1793 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4, i32 8, i32 8, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12, i32 16, i32 16, i32 16, i32 16, i32 20, i32 20, i32 20, i32 20, i32 24, i32 24, i32 24, i32 24, i32 28, i32 28, i32 28, i32 28> 1794 ret <32 x i8> %shuffle 1795} 1796 1797define <32 x i8> @shuffle_v32i8_03_03_03_03_07_07_07_07_11_11_11_11_15_15_15_15_19_19_19_19_23_23_23_23_27_27_27_27_31_31_31_31(<32 x i8> %a, <32 x i8> %b) { 1798; AVX1-LABEL: shuffle_v32i8_03_03_03_03_07_07_07_07_11_11_11_11_15_15_15_15_19_19_19_19_23_23_23_23_27_27_27_27_31_31_31_31: 1799; AVX1: # %bb.0: 1800; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1801; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [3,3,3,3,7,7,7,7,11,11,11,11,15,15,15,15] 1802; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 1803; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 1804; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1805; AVX1-NEXT: retq 1806; 1807; AVX2OR512VL-LABEL: shuffle_v32i8_03_03_03_03_07_07_07_07_11_11_11_11_15_15_15_15_19_19_19_19_23_23_23_23_27_27_27_27_31_31_31_31: 1808; AVX2OR512VL: # %bb.0: 1809; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[3,3,3,3,7,7,7,7,11,11,11,11,15,15,15,15,19,19,19,19,23,23,23,23,27,27,27,27,31,31,31,31] 1810; AVX2OR512VL-NEXT: retq 1811; 1812; XOPAVX1-LABEL: shuffle_v32i8_03_03_03_03_07_07_07_07_11_11_11_11_15_15_15_15_19_19_19_19_23_23_23_23_27_27_27_27_31_31_31_31: 1813; XOPAVX1: # %bb.0: 1814; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1815; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [3,3,3,3,7,7,7,7,11,11,11,11,15,15,15,15] 1816; XOPAVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 1817; XOPAVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 1818; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1819; XOPAVX1-NEXT: retq 1820; 1821; XOPAVX2-LABEL: shuffle_v32i8_03_03_03_03_07_07_07_07_11_11_11_11_15_15_15_15_19_19_19_19_23_23_23_23_27_27_27_27_31_31_31_31: 1822; XOPAVX2: # %bb.0: 1823; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[3,3,3,3,7,7,7,7,11,11,11,11,15,15,15,15,19,19,19,19,23,23,23,23,27,27,27,27,31,31,31,31] 1824; XOPAVX2-NEXT: retq 1825 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 7, i32 7, i32 7, i32 7, i32 11, i32 11, i32 11, i32 11, i32 15, i32 15, i32 15, i32 15, i32 19, i32 19, i32 19, i32 19, i32 23, i32 23, i32 23, i32 23, i32 27, i32 27, i32 27, i32 27, i32 31, i32 31, i32 31, i32 31> 1826 ret <32 x i8> %shuffle 1827} 1828 1829define <32 x i8> @shuffle_v32i8_00_00_02_02_04_04_06_06_08_08_10_10_12_12_14_14_16_16_18_18_20_20_22_22_24_24_26_26_28_28_30_30(<32 x i8> %a, <32 x i8> %b) { 1830; AVX1-LABEL: shuffle_v32i8_00_00_02_02_04_04_06_06_08_08_10_10_12_12_14_14_16_16_18_18_20_20_22_22_24_24_26_26_28_28_30_30: 1831; AVX1: # %bb.0: 1832; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1833; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14] 1834; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 1835; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 1836; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1837; AVX1-NEXT: retq 1838; 1839; AVX2OR512VL-LABEL: shuffle_v32i8_00_00_02_02_04_04_06_06_08_08_10_10_12_12_14_14_16_16_18_18_20_20_22_22_24_24_26_26_28_28_30_30: 1840; AVX2OR512VL: # %bb.0: 1841; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14,16,16,18,18,20,20,22,22,24,24,26,26,28,28,30,30] 1842; AVX2OR512VL-NEXT: retq 1843; 1844; XOPAVX1-LABEL: shuffle_v32i8_00_00_02_02_04_04_06_06_08_08_10_10_12_12_14_14_16_16_18_18_20_20_22_22_24_24_26_26_28_28_30_30: 1845; XOPAVX1: # %bb.0: 1846; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1847; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14] 1848; XOPAVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 1849; XOPAVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 1850; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1851; XOPAVX1-NEXT: retq 1852; 1853; XOPAVX2-LABEL: shuffle_v32i8_00_00_02_02_04_04_06_06_08_08_10_10_12_12_14_14_16_16_18_18_20_20_22_22_24_24_26_26_28_28_30_30: 1854; XOPAVX2: # %bb.0: 1855; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14,16,16,18,18,20,20,22,22,24,24,26,26,28,28,30,30] 1856; XOPAVX2-NEXT: retq 1857 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6, i32 8, i32 8, i32 10, i32 10, i32 12, i32 12, i32 14, i32 14, i32 16, i32 16, i32 18, i32 18, i32 20, i32 20, i32 22, i32 22, i32 24, i32 24, i32 26, i32 26, i32 28, i32 28, i32 30, i32 30> 1858 ret <32 x i8> %shuffle 1859} 1860 1861define <32 x i8> @shuffle_v32i8_01_01_03_03_05_05_07_07_09_09_11_11_13_13_15_15_17_17_19_19_21_21_23_23_25_25_27_27_29_29_31_31(<32 x i8> %a, <32 x i8> %b) { 1862; AVX1-LABEL: shuffle_v32i8_01_01_03_03_05_05_07_07_09_09_11_11_13_13_15_15_17_17_19_19_21_21_23_23_25_25_27_27_29_29_31_31: 1863; AVX1: # %bb.0: 1864; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1865; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15] 1866; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 1867; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 1868; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1869; AVX1-NEXT: retq 1870; 1871; AVX2OR512VL-LABEL: shuffle_v32i8_01_01_03_03_05_05_07_07_09_09_11_11_13_13_15_15_17_17_19_19_21_21_23_23_25_25_27_27_29_29_31_31: 1872; AVX2OR512VL: # %bb.0: 1873; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15,17,17,19,19,21,21,23,23,25,25,27,27,29,29,31,31] 1874; AVX2OR512VL-NEXT: retq 1875; 1876; XOPAVX1-LABEL: shuffle_v32i8_01_01_03_03_05_05_07_07_09_09_11_11_13_13_15_15_17_17_19_19_21_21_23_23_25_25_27_27_29_29_31_31: 1877; XOPAVX1: # %bb.0: 1878; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1879; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15] 1880; XOPAVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 1881; XOPAVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 1882; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1883; XOPAVX1-NEXT: retq 1884; 1885; XOPAVX2-LABEL: shuffle_v32i8_01_01_03_03_05_05_07_07_09_09_11_11_13_13_15_15_17_17_19_19_21_21_23_23_25_25_27_27_29_29_31_31: 1886; XOPAVX2: # %bb.0: 1887; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15,17,17,19,19,21,21,23,23,25,25,27,27,29,29,31,31] 1888; XOPAVX2-NEXT: retq 1889 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7, i32 9, i32 9, i32 11, i32 11, i32 13, i32 13, i32 15, i32 15, i32 17, i32 17, i32 19, i32 19, i32 21, i32 21, i32 23, i32 23, i32 25, i32 25, i32 27, i32 27, i32 29, i32 29, i32 31, i32 31> 1890 ret <32 x i8> %shuffle 1891} 1892 1893define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00(<32 x i8> %a, <32 x i8> %b) { 1894; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00: 1895; AVX1: # %bb.0: 1896; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0] 1897; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 1898; AVX1-NEXT: retq 1899; 1900; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00: 1901; AVX2: # %bb.0: 1902; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0] 1903; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1] 1904; AVX2-NEXT: retq 1905; 1906; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00: 1907; AVX512VLBW: # %bb.0: 1908; AVX512VLBW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0] 1909; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1] 1910; AVX512VLBW-NEXT: retq 1911; 1912; AVX512VLVBMI-SLOW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00: 1913; AVX512VLVBMI-SLOW: # %bb.0: 1914; AVX512VLVBMI-SLOW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0] 1915; AVX512VLVBMI-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1] 1916; AVX512VLVBMI-SLOW-NEXT: retq 1917; 1918; AVX512VLVBMI-FAST-ALL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00: 1919; AVX512VLVBMI-FAST-ALL: # %bb.0: 1920; AVX512VLVBMI-FAST-ALL-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0] 1921; AVX512VLVBMI-FAST-ALL-NEXT: # ymm1 = mem[0,1,0,1] 1922; AVX512VLVBMI-FAST-ALL-NEXT: vpermb %ymm0, %ymm1, %ymm0 1923; AVX512VLVBMI-FAST-ALL-NEXT: retq 1924; 1925; AVX512VLVBMI-FAST-PERLANE-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00: 1926; AVX512VLVBMI-FAST-PERLANE: # %bb.0: 1927; AVX512VLVBMI-FAST-PERLANE-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0] 1928; AVX512VLVBMI-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1] 1929; AVX512VLVBMI-FAST-PERLANE-NEXT: retq 1930; 1931; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00: 1932; XOPAVX1: # %bb.0: 1933; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0] 1934; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 1935; XOPAVX1-NEXT: retq 1936; 1937; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00: 1938; XOPAVX2: # %bb.0: 1939; XOPAVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0] 1940; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1] 1941; XOPAVX2-NEXT: retq 1942 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0> 1943 ret <32 x i8> %shuffle 1944} 1945 1946define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00(<32 x i8> %a, <32 x i8> %b) { 1947; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00: 1948; AVX1: # %bb.0: 1949; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0] 1950; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 1951; AVX1-NEXT: retq 1952; 1953; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00: 1954; AVX2: # %bb.0: 1955; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0] 1956; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1] 1957; AVX2-NEXT: retq 1958; 1959; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00: 1960; AVX512VLBW: # %bb.0: 1961; AVX512VLBW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0] 1962; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1] 1963; AVX512VLBW-NEXT: retq 1964; 1965; AVX512VLVBMI-SLOW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00: 1966; AVX512VLVBMI-SLOW: # %bb.0: 1967; AVX512VLVBMI-SLOW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0] 1968; AVX512VLVBMI-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1] 1969; AVX512VLVBMI-SLOW-NEXT: retq 1970; 1971; AVX512VLVBMI-FAST-ALL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00: 1972; AVX512VLVBMI-FAST-ALL: # %bb.0: 1973; AVX512VLVBMI-FAST-ALL-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0] 1974; AVX512VLVBMI-FAST-ALL-NEXT: # ymm1 = mem[0,1,0,1] 1975; AVX512VLVBMI-FAST-ALL-NEXT: vpermb %ymm0, %ymm1, %ymm0 1976; AVX512VLVBMI-FAST-ALL-NEXT: retq 1977; 1978; AVX512VLVBMI-FAST-PERLANE-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00: 1979; AVX512VLVBMI-FAST-PERLANE: # %bb.0: 1980; AVX512VLVBMI-FAST-PERLANE-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0] 1981; AVX512VLVBMI-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1] 1982; AVX512VLVBMI-FAST-PERLANE-NEXT: retq 1983; 1984; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00: 1985; XOPAVX1: # %bb.0: 1986; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0] 1987; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 1988; XOPAVX1-NEXT: retq 1989; 1990; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00: 1991; XOPAVX2: # %bb.0: 1992; XOPAVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0] 1993; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1] 1994; XOPAVX2-NEXT: retq 1995 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0> 1996 ret <32 x i8> %shuffle 1997} 1998 1999define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) { 2000; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00: 2001; AVX1: # %bb.0: 2002; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0] 2003; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 2004; AVX1-NEXT: retq 2005; 2006; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00: 2007; AVX2: # %bb.0: 2008; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0] 2009; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1] 2010; AVX2-NEXT: retq 2011; 2012; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00: 2013; AVX512VLBW: # %bb.0: 2014; AVX512VLBW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0] 2015; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1] 2016; AVX512VLBW-NEXT: retq 2017; 2018; AVX512VLVBMI-SLOW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00: 2019; AVX512VLVBMI-SLOW: # %bb.0: 2020; AVX512VLVBMI-SLOW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0] 2021; AVX512VLVBMI-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1] 2022; AVX512VLVBMI-SLOW-NEXT: retq 2023; 2024; AVX512VLVBMI-FAST-ALL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00: 2025; AVX512VLVBMI-FAST-ALL: # %bb.0: 2026; AVX512VLVBMI-FAST-ALL-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0] 2027; AVX512VLVBMI-FAST-ALL-NEXT: # ymm1 = mem[0,1,0,1] 2028; AVX512VLVBMI-FAST-ALL-NEXT: vpermb %ymm0, %ymm1, %ymm0 2029; AVX512VLVBMI-FAST-ALL-NEXT: retq 2030; 2031; AVX512VLVBMI-FAST-PERLANE-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00: 2032; AVX512VLVBMI-FAST-PERLANE: # %bb.0: 2033; AVX512VLVBMI-FAST-PERLANE-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0] 2034; AVX512VLVBMI-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1] 2035; AVX512VLVBMI-FAST-PERLANE-NEXT: retq 2036; 2037; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00: 2038; XOPAVX1: # %bb.0: 2039; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0] 2040; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 2041; XOPAVX1-NEXT: retq 2042; 2043; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00: 2044; XOPAVX2: # %bb.0: 2045; XOPAVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0] 2046; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1] 2047; XOPAVX2-NEXT: retq 2048 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 2049 ret <32 x i8> %shuffle 2050} 2051 2052define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) { 2053; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00: 2054; AVX1: # %bb.0: 2055; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0] 2056; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 2057; AVX1-NEXT: retq 2058; 2059; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00: 2060; AVX2: # %bb.0: 2061; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0] 2062; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1] 2063; AVX2-NEXT: retq 2064; 2065; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00: 2066; AVX512VLBW: # %bb.0: 2067; AVX512VLBW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0] 2068; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1] 2069; AVX512VLBW-NEXT: retq 2070; 2071; AVX512VLVBMI-SLOW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00: 2072; AVX512VLVBMI-SLOW: # %bb.0: 2073; AVX512VLVBMI-SLOW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0] 2074; AVX512VLVBMI-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1] 2075; AVX512VLVBMI-SLOW-NEXT: retq 2076; 2077; AVX512VLVBMI-FAST-ALL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00: 2078; AVX512VLVBMI-FAST-ALL: # %bb.0: 2079; AVX512VLVBMI-FAST-ALL-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0] 2080; AVX512VLVBMI-FAST-ALL-NEXT: # ymm1 = mem[0,1,0,1] 2081; AVX512VLVBMI-FAST-ALL-NEXT: vpermb %ymm0, %ymm1, %ymm0 2082; AVX512VLVBMI-FAST-ALL-NEXT: retq 2083; 2084; AVX512VLVBMI-FAST-PERLANE-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00: 2085; AVX512VLVBMI-FAST-PERLANE: # %bb.0: 2086; AVX512VLVBMI-FAST-PERLANE-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0] 2087; AVX512VLVBMI-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1] 2088; AVX512VLVBMI-FAST-PERLANE-NEXT: retq 2089; 2090; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00: 2091; XOPAVX1: # %bb.0: 2092; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0] 2093; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 2094; XOPAVX1-NEXT: retq 2095; 2096; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00: 2097; XOPAVX2: # %bb.0: 2098; XOPAVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0] 2099; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1] 2100; XOPAVX2-NEXT: retq 2101 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 2102 ret <32 x i8> %shuffle 2103} 2104 2105define <32 x i8> @shuffle_v32i8_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) { 2106; AVX1-LABEL: shuffle_v32i8_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 2107; AVX1: # %bb.0: 2108; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0] 2109; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 2110; AVX1-NEXT: retq 2111; 2112; AVX2-LABEL: shuffle_v32i8_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 2113; AVX2: # %bb.0: 2114; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0] 2115; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1] 2116; AVX2-NEXT: retq 2117; 2118; AVX512VLBW-LABEL: shuffle_v32i8_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 2119; AVX512VLBW: # %bb.0: 2120; AVX512VLBW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0] 2121; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1] 2122; AVX512VLBW-NEXT: retq 2123; 2124; AVX512VLVBMI-SLOW-LABEL: shuffle_v32i8_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 2125; AVX512VLVBMI-SLOW: # %bb.0: 2126; AVX512VLVBMI-SLOW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0] 2127; AVX512VLVBMI-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1] 2128; AVX512VLVBMI-SLOW-NEXT: retq 2129; 2130; AVX512VLVBMI-FAST-ALL-LABEL: shuffle_v32i8_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 2131; AVX512VLVBMI-FAST-ALL: # %bb.0: 2132; AVX512VLVBMI-FAST-ALL-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0] 2133; AVX512VLVBMI-FAST-ALL-NEXT: # ymm1 = mem[0,1,0,1] 2134; AVX512VLVBMI-FAST-ALL-NEXT: vpermb %ymm0, %ymm1, %ymm0 2135; AVX512VLVBMI-FAST-ALL-NEXT: retq 2136; 2137; AVX512VLVBMI-FAST-PERLANE-LABEL: shuffle_v32i8_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 2138; AVX512VLVBMI-FAST-PERLANE: # %bb.0: 2139; AVX512VLVBMI-FAST-PERLANE-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0] 2140; AVX512VLVBMI-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1] 2141; AVX512VLVBMI-FAST-PERLANE-NEXT: retq 2142; 2143; XOPAVX1-LABEL: shuffle_v32i8_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 2144; XOPAVX1: # %bb.0: 2145; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0] 2146; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 2147; XOPAVX1-NEXT: retq 2148; 2149; XOPAVX2-LABEL: shuffle_v32i8_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 2150; XOPAVX2: # %bb.0: 2151; XOPAVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0] 2152; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1] 2153; XOPAVX2-NEXT: retq 2154 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 14, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 14, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 2155 ret <32 x i8> %shuffle 2156} 2157 2158define <32 x i8> @shuffle_v32i8_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) { 2159; AVX1-LABEL: shuffle_v32i8_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 2160; AVX1: # %bb.0: 2161; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0] 2162; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 2163; AVX1-NEXT: retq 2164; 2165; AVX2-LABEL: shuffle_v32i8_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 2166; AVX2: # %bb.0: 2167; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0] 2168; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1] 2169; AVX2-NEXT: retq 2170; 2171; AVX512VLBW-LABEL: shuffle_v32i8_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 2172; AVX512VLBW: # %bb.0: 2173; AVX512VLBW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0] 2174; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1] 2175; AVX512VLBW-NEXT: retq 2176; 2177; AVX512VLVBMI-SLOW-LABEL: shuffle_v32i8_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 2178; AVX512VLVBMI-SLOW: # %bb.0: 2179; AVX512VLVBMI-SLOW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0] 2180; AVX512VLVBMI-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1] 2181; AVX512VLVBMI-SLOW-NEXT: retq 2182; 2183; AVX512VLVBMI-FAST-ALL-LABEL: shuffle_v32i8_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 2184; AVX512VLVBMI-FAST-ALL: # %bb.0: 2185; AVX512VLVBMI-FAST-ALL-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0] 2186; AVX512VLVBMI-FAST-ALL-NEXT: # ymm1 = mem[0,1,0,1] 2187; AVX512VLVBMI-FAST-ALL-NEXT: vpermb %ymm0, %ymm1, %ymm0 2188; AVX512VLVBMI-FAST-ALL-NEXT: retq 2189; 2190; AVX512VLVBMI-FAST-PERLANE-LABEL: shuffle_v32i8_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 2191; AVX512VLVBMI-FAST-PERLANE: # %bb.0: 2192; AVX512VLVBMI-FAST-PERLANE-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0] 2193; AVX512VLVBMI-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1] 2194; AVX512VLVBMI-FAST-PERLANE-NEXT: retq 2195; 2196; XOPAVX1-LABEL: shuffle_v32i8_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 2197; XOPAVX1: # %bb.0: 2198; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0] 2199; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 2200; XOPAVX1-NEXT: retq 2201; 2202; XOPAVX2-LABEL: shuffle_v32i8_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 2203; XOPAVX2: # %bb.0: 2204; XOPAVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0] 2205; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1] 2206; XOPAVX2-NEXT: retq 2207 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 2208 ret <32 x i8> %shuffle 2209} 2210 2211define <32 x i8> @shuffle_v32i8_00_33_02_35_04_37_06_39_08_41_10_43_12_45_14_47_16_49_18_51_20_53_22_55_24_57_26_59_28_61_30_63(<32 x i8> %a, <32 x i8> %b) { 2212; AVX1-LABEL: shuffle_v32i8_00_33_02_35_04_37_06_39_08_41_10_43_12_45_14_47_16_49_18_51_20_53_22_55_24_57_26_59_28_61_30_63: 2213; AVX1: # %bb.0: 2214; AVX1-NEXT: vbroadcastss {{.*#+}} ymm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] 2215; AVX1-NEXT: vandnps %ymm1, %ymm2, %ymm1 2216; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0 2217; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 2218; AVX1-NEXT: retq 2219; 2220; AVX2-LABEL: shuffle_v32i8_00_33_02_35_04_37_06_39_08_41_10_43_12_45_14_47_16_49_18_51_20_53_22_55_24_57_26_59_28_61_30_63: 2221; AVX2: # %bb.0: 2222; AVX2-NEXT: vpbroadcastw {{.*#+}} ymm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] 2223; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 2224; AVX2-NEXT: retq 2225; 2226; AVX512VL-LABEL: shuffle_v32i8_00_33_02_35_04_37_06_39_08_41_10_43_12_45_14_47_16_49_18_51_20_53_22_55_24_57_26_59_28_61_30_63: 2227; AVX512VL: # %bb.0: 2228; AVX512VL-NEXT: movl $-1431655766, %eax # imm = 0xAAAAAAAA 2229; AVX512VL-NEXT: kmovd %eax, %k1 2230; AVX512VL-NEXT: vmovdqu8 %ymm1, %ymm0 {%k1} 2231; AVX512VL-NEXT: retq 2232; 2233; XOPAVX1-LABEL: shuffle_v32i8_00_33_02_35_04_37_06_39_08_41_10_43_12_45_14_47_16_49_18_51_20_53_22_55_24_57_26_59_28_61_30_63: 2234; XOPAVX1: # %bb.0: 2235; XOPAVX1-NEXT: vpcmov {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0, %ymm0 2236; XOPAVX1-NEXT: retq 2237; 2238; XOPAVX2-LABEL: shuffle_v32i8_00_33_02_35_04_37_06_39_08_41_10_43_12_45_14_47_16_49_18_51_20_53_22_55_24_57_26_59_28_61_30_63: 2239; XOPAVX2: # %bb.0: 2240; XOPAVX2-NEXT: vpbroadcastw {{.*#+}} ymm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] 2241; XOPAVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 2242; XOPAVX2-NEXT: retq 2243 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 33, i32 2, i32 35, i32 4, i32 37, i32 6, i32 39, i32 8, i32 41, i32 10, i32 43, i32 12, i32 45, i32 14, i32 47, i32 16, i32 49, i32 18, i32 51, i32 20, i32 53, i32 22, i32 55, i32 24, i32 57, i32 26, i32 59, i32 28, i32 61, i32 30, i32 63> 2244 ret <32 x i8> %shuffle 2245} 2246 2247define <32 x i8> @shuffle_v32i8_32_01_34_03_36_05_38_07_40_09_42_11_44_13_46_15_48_17_50_19_52_21_54_23_56_25_58_27_60_29_62_31(<32 x i8> %a, <32 x i8> %b) { 2248; AVX1-LABEL: shuffle_v32i8_32_01_34_03_36_05_38_07_40_09_42_11_44_13_46_15_48_17_50_19_52_21_54_23_56_25_58_27_60_29_62_31: 2249; AVX1: # %bb.0: 2250; AVX1-NEXT: vbroadcastss {{.*#+}} ymm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] 2251; AVX1-NEXT: vandnps %ymm0, %ymm2, %ymm0 2252; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1 2253; AVX1-NEXT: vorps %ymm0, %ymm1, %ymm0 2254; AVX1-NEXT: retq 2255; 2256; AVX2-LABEL: shuffle_v32i8_32_01_34_03_36_05_38_07_40_09_42_11_44_13_46_15_48_17_50_19_52_21_54_23_56_25_58_27_60_29_62_31: 2257; AVX2: # %bb.0: 2258; AVX2-NEXT: vpbroadcastw {{.*#+}} ymm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] 2259; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 2260; AVX2-NEXT: retq 2261; 2262; AVX512VL-LABEL: shuffle_v32i8_32_01_34_03_36_05_38_07_40_09_42_11_44_13_46_15_48_17_50_19_52_21_54_23_56_25_58_27_60_29_62_31: 2263; AVX512VL: # %bb.0: 2264; AVX512VL-NEXT: movl $-1431655766, %eax # imm = 0xAAAAAAAA 2265; AVX512VL-NEXT: kmovd %eax, %k1 2266; AVX512VL-NEXT: vpblendmb %ymm0, %ymm1, %ymm0 {%k1} 2267; AVX512VL-NEXT: retq 2268; 2269; XOPAVX1-LABEL: shuffle_v32i8_32_01_34_03_36_05_38_07_40_09_42_11_44_13_46_15_48_17_50_19_52_21_54_23_56_25_58_27_60_29_62_31: 2270; XOPAVX1: # %bb.0: 2271; XOPAVX1-NEXT: vpcmov {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1, %ymm0 2272; XOPAVX1-NEXT: retq 2273; 2274; XOPAVX2-LABEL: shuffle_v32i8_32_01_34_03_36_05_38_07_40_09_42_11_44_13_46_15_48_17_50_19_52_21_54_23_56_25_58_27_60_29_62_31: 2275; XOPAVX2: # %bb.0: 2276; XOPAVX2-NEXT: vpbroadcastw {{.*#+}} ymm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] 2277; XOPAVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 2278; XOPAVX2-NEXT: retq 2279 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 32, i32 1, i32 34, i32 3, i32 36, i32 5, i32 38, i32 7, i32 40, i32 9, i32 42, i32 11, i32 44, i32 13, i32 46, i32 15, i32 48, i32 17, i32 50, i32 19, i32 52, i32 21, i32 54, i32 23, i32 56, i32 25, i32 58, i32 27, i32 60, i32 29, i32 62, i32 31> 2280 ret <32 x i8> %shuffle 2281} 2282 2283; PR27780 - https://bugs.llvm.org/show_bug.cgi?id=27780 2284 2285define <32 x i8> @load_fold_pblendvb(ptr %px, <32 x i8> %y) { 2286; AVX1-LABEL: load_fold_pblendvb: 2287; AVX1: # %bb.0: 2288; AVX1-NEXT: vbroadcastsd {{.*#+}} ymm1 = [255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255] 2289; AVX1-NEXT: vandnps (%rdi), %ymm1, %ymm2 2290; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 2291; AVX1-NEXT: vorps %ymm2, %ymm0, %ymm0 2292; AVX1-NEXT: retq 2293; 2294; AVX2-LABEL: load_fold_pblendvb: 2295; AVX2: # %bb.0: 2296; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [0,0,255,0,255,255,255,0,0,0,255,0,255,255,255,0,0,0,255,0,255,255,255,0,0,0,255,0,255,255,255,0] 2297; AVX2-NEXT: vpblendvb %ymm1, (%rdi), %ymm0, %ymm0 2298; AVX2-NEXT: retq 2299; 2300; AVX512VL-LABEL: load_fold_pblendvb: 2301; AVX512VL: # %bb.0: 2302; AVX512VL-NEXT: movl $1953789044, %eax # imm = 0x74747474 2303; AVX512VL-NEXT: kmovd %eax, %k1 2304; AVX512VL-NEXT: vmovdqu8 (%rdi), %ymm0 {%k1} 2305; AVX512VL-NEXT: retq 2306; 2307; XOPAVX1-LABEL: load_fold_pblendvb: 2308; XOPAVX1: # %bb.0: 2309; XOPAVX1-NEXT: vbroadcastsd {{.*#+}} ymm1 = [255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255] 2310; XOPAVX1-NEXT: vpcmov %ymm1, (%rdi), %ymm0, %ymm0 2311; XOPAVX1-NEXT: retq 2312; 2313; XOPAVX2-LABEL: load_fold_pblendvb: 2314; XOPAVX2: # %bb.0: 2315; XOPAVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [0,0,255,0,255,255,255,0,0,0,255,0,255,255,255,0,0,0,255,0,255,255,255,0,0,0,255,0,255,255,255,0] 2316; XOPAVX2-NEXT: vpblendvb %ymm1, (%rdi), %ymm0, %ymm0 2317; XOPAVX2-NEXT: retq 2318 %x = load <32 x i8>, ptr %px, align 32 2319 %select = shufflevector <32 x i8> %x, <32 x i8> %y, <32 x i32> <i32 32, i32 33, i32 2, i32 35, i32 4, i32 5, i32 6, i32 39, i32 40, i32 41, i32 10, i32 43, i32 12, i32 13, i32 14, i32 47, i32 48, i32 49, i32 18, i32 51, i32 20, i32 21, i32 22, i32 55, i32 56, i32 57, i32 26, i32 59, i32 28, i32 29, i32 30, i32 63> 2320 ret <32 x i8> %select 2321} 2322 2323define <32 x i8> @load_fold_pblendvb_commute(ptr %px, <32 x i8> %y) { 2324; AVX1-LABEL: load_fold_pblendvb_commute: 2325; AVX1: # %bb.0: 2326; AVX1-NEXT: vbroadcastsd {{.*#+}} ymm1 = [255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255] 2327; AVX1-NEXT: vandnps %ymm0, %ymm1, %ymm0 2328; AVX1-NEXT: vandps (%rdi), %ymm1, %ymm1 2329; AVX1-NEXT: vorps %ymm0, %ymm1, %ymm0 2330; AVX1-NEXT: retq 2331; 2332; AVX2-LABEL: load_fold_pblendvb_commute: 2333; AVX2: # %bb.0: 2334; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255] 2335; AVX2-NEXT: vpblendvb %ymm1, (%rdi), %ymm0, %ymm0 2336; AVX2-NEXT: retq 2337; 2338; AVX512VL-LABEL: load_fold_pblendvb_commute: 2339; AVX512VL: # %bb.0: 2340; AVX512VL-NEXT: vmovdqa (%rdi), %ymm1 2341; AVX512VL-NEXT: movl $1953789044, %eax # imm = 0x74747474 2342; AVX512VL-NEXT: kmovd %eax, %k1 2343; AVX512VL-NEXT: vmovdqu8 %ymm0, %ymm1 {%k1} 2344; AVX512VL-NEXT: vmovdqa %ymm1, %ymm0 2345; AVX512VL-NEXT: retq 2346; 2347; XOPAVX1-LABEL: load_fold_pblendvb_commute: 2348; XOPAVX1: # %bb.0: 2349; XOPAVX1-NEXT: vmovdqa (%rdi), %ymm1 2350; XOPAVX1-NEXT: vbroadcastsd {{.*#+}} ymm2 = [255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255] 2351; XOPAVX1-NEXT: vpcmov %ymm2, %ymm0, %ymm1, %ymm0 2352; XOPAVX1-NEXT: retq 2353; 2354; XOPAVX2-LABEL: load_fold_pblendvb_commute: 2355; XOPAVX2: # %bb.0: 2356; XOPAVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255] 2357; XOPAVX2-NEXT: vpblendvb %ymm1, (%rdi), %ymm0, %ymm0 2358; XOPAVX2-NEXT: retq 2359 %x = load <32 x i8>, ptr %px, align 32 2360 %select = shufflevector <32 x i8> %y, <32 x i8> %x, <32 x i32> <i32 32, i32 33, i32 2, i32 35, i32 4, i32 5, i32 6, i32 39, i32 40, i32 41, i32 10, i32 43, i32 12, i32 13, i32 14, i32 47, i32 48, i32 49, i32 18, i32 51, i32 20, i32 21, i32 22, i32 55, i32 56, i32 57, i32 26, i32 59, i32 28, i32 29, i32 30, i32 63> 2361 ret <32 x i8> %select 2362} 2363 2364define <32 x i8> @shuffle_v32i8_zz_01_zz_03_zz_05_zz_07_zz_09_zz_11_zz_13_zz_15_zz_17_zz_19_zz_21_zz_23_zz_25_zz_27_zz_29_zz_31(<32 x i8> %a) { 2365; AVX1-LABEL: shuffle_v32i8_zz_01_zz_03_zz_05_zz_07_zz_09_zz_11_zz_13_zz_15_zz_17_zz_19_zz_21_zz_23_zz_25_zz_27_zz_29_zz_31: 2366; AVX1: # %bb.0: 2367; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 2368; AVX1-NEXT: retq 2369; 2370; AVX2-LABEL: shuffle_v32i8_zz_01_zz_03_zz_05_zz_07_zz_09_zz_11_zz_13_zz_15_zz_17_zz_19_zz_21_zz_23_zz_25_zz_27_zz_29_zz_31: 2371; AVX2: # %bb.0: 2372; AVX2-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 2373; AVX2-NEXT: retq 2374; 2375; AVX512VL-LABEL: shuffle_v32i8_zz_01_zz_03_zz_05_zz_07_zz_09_zz_11_zz_13_zz_15_zz_17_zz_19_zz_21_zz_23_zz_25_zz_27_zz_29_zz_31: 2376; AVX512VL: # %bb.0: 2377; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0 2378; AVX512VL-NEXT: retq 2379; 2380; XOP-LABEL: shuffle_v32i8_zz_01_zz_03_zz_05_zz_07_zz_09_zz_11_zz_13_zz_15_zz_17_zz_19_zz_21_zz_23_zz_25_zz_27_zz_29_zz_31: 2381; XOP: # %bb.0: 2382; XOP-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 2383; XOP-NEXT: retq 2384 %shuffle = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 32, i32 1, i32 34, i32 3, i32 36, i32 5, i32 38, i32 7, i32 40, i32 9, i32 42, i32 11, i32 44, i32 13, i32 46, i32 15, i32 48, i32 17, i32 50, i32 19, i32 52, i32 21, i32 54, i32 23, i32 56, i32 25, i32 58, i32 27, i32 60, i32 29, i32 62, i32 31> 2385 ret <32 x i8> %shuffle 2386} 2387 2388define <32 x i8> @shuffle_v32i8_01_zz_02_zz_04_uu_06_07_08_09_10_11_12_13_14_15_u6_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31(<32 x i8> %a) { 2389; AVX1-LABEL: shuffle_v32i8_01_zz_02_zz_04_uu_06_07_08_09_10_11_12_13_14_15_u6_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31: 2390; AVX1: # %bb.0: 2391; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[1],zero,xmm0[2],zero,xmm0[4,u,6,7,8,9,10,11,12,13,14,15] 2392; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] 2393; AVX1-NEXT: retq 2394; 2395; AVX2OR512VL-LABEL: shuffle_v32i8_01_zz_02_zz_04_uu_06_07_08_09_10_11_12_13_14_15_u6_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31: 2396; AVX2OR512VL: # %bb.0: 2397; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[1],zero,ymm0[2],zero,ymm0[4,u,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31] 2398; AVX2OR512VL-NEXT: retq 2399; 2400; XOPAVX1-LABEL: shuffle_v32i8_01_zz_02_zz_04_uu_06_07_08_09_10_11_12_13_14_15_u6_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31: 2401; XOPAVX1: # %bb.0: 2402; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[1],zero,xmm0[2],zero,xmm0[4,u,6,7,8,9,10,11,12,13,14,15] 2403; XOPAVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] 2404; XOPAVX1-NEXT: retq 2405; 2406; XOPAVX2-LABEL: shuffle_v32i8_01_zz_02_zz_04_uu_06_07_08_09_10_11_12_13_14_15_u6_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31: 2407; XOPAVX2: # %bb.0: 2408; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[1],zero,ymm0[2],zero,ymm0[4,u,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31] 2409; XOPAVX2-NEXT: retq 2410 %shuffle = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 1, i32 32, i32 2, i32 32, i32 4, i32 poison, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 2411 ret <32 x i8> %shuffle 2412} 2413 2414define <32 x i8> @shuffle_v32i8_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32(<32 x i8> %a, <32 x i8> %b) { 2415; AVX1-LABEL: shuffle_v32i8_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32: 2416; AVX1: # %bb.0: 2417; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 2418; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 2419; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 2420; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 2421; AVX1-NEXT: retq 2422; 2423; AVX2OR512VL-LABEL: shuffle_v32i8_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32: 2424; AVX2OR512VL: # %bb.0: 2425; AVX2OR512VL-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 2426; AVX2OR512VL-NEXT: vpbroadcastw %xmm0, %ymm0 2427; AVX2OR512VL-NEXT: retq 2428; 2429; XOPAVX1-LABEL: shuffle_v32i8_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32: 2430; XOPAVX1: # %bb.0: 2431; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[0],xmm1[0],xmm0[0],xmm1[0],xmm0[0],xmm1[0],xmm0[0],xmm1[0],xmm0[0],xmm1[0],xmm0[0],xmm1[0],xmm0[0],xmm1[0] 2432; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 2433; XOPAVX1-NEXT: retq 2434; 2435; XOPAVX2-LABEL: shuffle_v32i8_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32: 2436; XOPAVX2: # %bb.0: 2437; XOPAVX2-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 2438; XOPAVX2-NEXT: vpbroadcastw %xmm0, %ymm0 2439; XOPAVX2-NEXT: retq 2440 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 32, i32 0, i32 32, i32 0, i32 32, i32 0, i32 32, i32 0, i32 32, i32 0, i32 32, i32 0, i32 32, i32 0, i32 32, i32 0, i32 32, i32 0, i32 32, i32 0, i32 32, i32 0, i32 32, i32 0, i32 32, i32 0, i32 32, i32 0, i32 32, i32 0, i32 32> 2441 ret <32 x i8> %shuffle 2442} 2443 2444define <32 x i8> @shuffle_v32i8_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_16_48_16_48_16_48_16_48_16_48_16_48_16_48_16_48(<32 x i8> %a, <32 x i8> %b) { 2445; AVX1-LABEL: shuffle_v32i8_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_16_48_16_48_16_48_16_48_16_48_16_48_16_48_16_48: 2446; AVX1: # %bb.0: 2447; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm2 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 2448; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[0,0,0,0,4,5,6,7] 2449; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 2450; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 2451; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 2452; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 2453; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 2454; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4] 2455; AVX1-NEXT: retq 2456; 2457; AVX2-SLOW-LABEL: shuffle_v32i8_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_16_48_16_48_16_48_16_48_16_48_16_48_16_48_16_48: 2458; AVX2-SLOW: # %bb.0: 2459; AVX2-SLOW-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] 2460; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15] 2461; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4] 2462; AVX2-SLOW-NEXT: retq 2463; 2464; AVX2-FAST-LABEL: shuffle_v32i8_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_16_48_16_48_16_48_16_48_16_48_16_48_16_48_16_48: 2465; AVX2-FAST: # %bb.0: 2466; AVX2-FAST-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] 2467; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17] 2468; AVX2-FAST-NEXT: retq 2469; 2470; AVX512VLBW-SLOW-LABEL: shuffle_v32i8_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_16_48_16_48_16_48_16_48_16_48_16_48_16_48_16_48: 2471; AVX512VLBW-SLOW: # %bb.0: 2472; AVX512VLBW-SLOW-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] 2473; AVX512VLBW-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15] 2474; AVX512VLBW-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4] 2475; AVX512VLBW-SLOW-NEXT: retq 2476; 2477; AVX512VLBW-FAST-LABEL: shuffle_v32i8_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_16_48_16_48_16_48_16_48_16_48_16_48_16_48_16_48: 2478; AVX512VLBW-FAST: # %bb.0: 2479; AVX512VLBW-FAST-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] 2480; AVX512VLBW-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17] 2481; AVX512VLBW-FAST-NEXT: retq 2482; 2483; AVX512VLVBMI-LABEL: shuffle_v32i8_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_16_48_16_48_16_48_16_48_16_48_16_48_16_48_16_48: 2484; AVX512VLVBMI: # %bb.0: 2485; AVX512VLVBMI-NEXT: vmovdqa {{.*#+}} ymm2 = [0,32,0,32,0,32,0,32,0,32,0,32,0,32,0,32,16,48,16,48,16,48,16,48,16,48,16,48,16,48,16,48] 2486; AVX512VLVBMI-NEXT: vpermt2b %ymm1, %ymm2, %ymm0 2487; AVX512VLVBMI-NEXT: retq 2488; 2489; XOPAVX1-LABEL: shuffle_v32i8_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_16_48_16_48_16_48_16_48_16_48_16_48_16_48_16_48: 2490; XOPAVX1: # %bb.0: 2491; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 2492; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 2493; XOPAVX1-NEXT: vbroadcastss {{.*#+}} xmm4 = [0,16,0,16,0,16,0,16,0,16,0,16,0,16,0,16] 2494; XOPAVX1-NEXT: vpperm %xmm4, %xmm2, %xmm3, %xmm2 2495; XOPAVX1-NEXT: vpperm %xmm4, %xmm1, %xmm0, %xmm0 2496; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 2497; XOPAVX1-NEXT: retq 2498; 2499; XOPAVX2-LABEL: shuffle_v32i8_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_16_48_16_48_16_48_16_48_16_48_16_48_16_48_16_48: 2500; XOPAVX2: # %bb.0: 2501; XOPAVX2-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] 2502; XOPAVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15] 2503; XOPAVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4] 2504; XOPAVX2-NEXT: retq 2505 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 32, i32 0, i32 32, i32 0, i32 32, i32 0, i32 32, i32 0, i32 32, i32 0, i32 32, i32 0, i32 32, i32 0, i32 32, i32 16, i32 48, i32 16, i32 48, i32 16, i32 48, i32 16, i32 48, i32 16, i32 48, i32 16, i32 48, i32 16, i32 48, i32 16, i32 48> 2506 ret <32 x i8> %shuffle 2507} 2508 2509define <32 x i8> @shuffle_v32i8_32_32_32_32_32_32_32_32_08_09_10_11_12_13_14_15_48_48_48_48_48_48_48_48_24_25_26_27_28_29_30_31(<32 x i8> %a, <32 x i8> %b) { 2510; AVX1-LABEL: shuffle_v32i8_32_32_32_32_32_32_32_32_08_09_10_11_12_13_14_15_48_48_48_48_48_48_48_48_24_25_26_27_28_29_30_31: 2511; AVX1: # %bb.0: 2512; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 2513; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 2514; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 2515; AVX1-NEXT: vpshuflw {{.*#+}} xmm3 = xmm3[0,0,0,0,4,5,6,7] 2516; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7] 2517; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 2518; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7] 2519; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] 2520; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 2521; AVX1-NEXT: retq 2522; 2523; AVX2-LABEL: shuffle_v32i8_32_32_32_32_32_32_32_32_08_09_10_11_12_13_14_15_48_48_48_48_48_48_48_48_24_25_26_27_28_29_30_31: 2524; AVX2: # %bb.0: 2525; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 2526; AVX2-NEXT: vpshufb %ymm2, %ymm1, %ymm1 2527; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7] 2528; AVX2-NEXT: retq 2529; 2530; AVX512VLBW-LABEL: shuffle_v32i8_32_32_32_32_32_32_32_32_08_09_10_11_12_13_14_15_48_48_48_48_48_48_48_48_24_25_26_27_28_29_30_31: 2531; AVX512VLBW: # %bb.0: 2532; AVX512VLBW-NEXT: vpxor %xmm2, %xmm2, %xmm2 2533; AVX512VLBW-NEXT: vpshufb %ymm2, %ymm1, %ymm1 2534; AVX512VLBW-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7] 2535; AVX512VLBW-NEXT: retq 2536; 2537; AVX512VLVBMI-LABEL: shuffle_v32i8_32_32_32_32_32_32_32_32_08_09_10_11_12_13_14_15_48_48_48_48_48_48_48_48_24_25_26_27_28_29_30_31: 2538; AVX512VLVBMI: # %bb.0: 2539; AVX512VLVBMI-NEXT: vmovdqa {{.*#+}} ymm2 = [0,0,0,0,0,0,0,0,40,41,42,43,44,45,46,47,16,16,16,16,16,16,16,16,56,57,58,59,60,61,62,63] 2540; AVX512VLVBMI-NEXT: vpermi2b %ymm0, %ymm1, %ymm2 2541; AVX512VLVBMI-NEXT: vmovdqa %ymm2, %ymm0 2542; AVX512VLVBMI-NEXT: retq 2543; 2544; XOPAVX1-LABEL: shuffle_v32i8_32_32_32_32_32_32_32_32_08_09_10_11_12_13_14_15_48_48_48_48_48_48_48_48_24_25_26_27_28_29_30_31: 2545; XOPAVX1: # %bb.0: 2546; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 2547; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 2548; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [0,0,0,0,0,0,0,0,24,25,26,27,28,29,30,31] 2549; XOPAVX1-NEXT: vpperm %xmm4, %xmm2, %xmm3, %xmm2 2550; XOPAVX1-NEXT: vpperm %xmm4, %xmm0, %xmm1, %xmm0 2551; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 2552; XOPAVX1-NEXT: retq 2553; 2554; XOPAVX2-LABEL: shuffle_v32i8_32_32_32_32_32_32_32_32_08_09_10_11_12_13_14_15_48_48_48_48_48_48_48_48_24_25_26_27_28_29_30_31: 2555; XOPAVX2: # %bb.0: 2556; XOPAVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 2557; XOPAVX2-NEXT: vpshufb %ymm2, %ymm1, %ymm1 2558; XOPAVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7] 2559; XOPAVX2-NEXT: retq 2560 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 2561 ret <32 x i8> %shuffle 2562} 2563 2564define <32 x i8> @shuffle_v32i8_39_38_37_36_35_34_33_32_15_14_13_12_11_10_09_08_55_54_53_52_51_50_49_48_31_30_29_28_27_26_25_24(<32 x i8> %a, <32 x i8> %b) { 2565; AVX1-LABEL: shuffle_v32i8_39_38_37_36_35_34_33_32_15_14_13_12_11_10_09_08_55_54_53_52_51_50_49_48_31_30_29_28_27_26_25_24: 2566; AVX1: # %bb.0: 2567; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 2568; AVX1-NEXT: vmovq {{.*#+}} xmm3 = [15,14,13,12,11,10,9,8,0,0,0,0,0,0,0,0] 2569; AVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2 2570; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4 2571; AVX1-NEXT: vmovq {{.*#+}} xmm5 = [7,6,5,4,3,2,1,0,0,0,0,0,0,0,0,0] 2572; AVX1-NEXT: vpshufb %xmm5, %xmm4, %xmm4 2573; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm4[0],xmm2[0] 2574; AVX1-NEXT: vpshufb %xmm3, %xmm0, %xmm0 2575; AVX1-NEXT: vpshufb %xmm5, %xmm1, %xmm1 2576; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 2577; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 2578; AVX1-NEXT: retq 2579; 2580; AVX2-LABEL: shuffle_v32i8_39_38_37_36_35_34_33_32_15_14_13_12_11_10_09_08_55_54_53_52_51_50_49_48_31_30_29_28_27_26_25_24: 2581; AVX2: # %bb.0: 2582; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7] 2583; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8,23,22,21,20,19,18,17,16,31,30,29,28,27,26,25,24] 2584; AVX2-NEXT: retq 2585; 2586; AVX512VLBW-LABEL: shuffle_v32i8_39_38_37_36_35_34_33_32_15_14_13_12_11_10_09_08_55_54_53_52_51_50_49_48_31_30_29_28_27_26_25_24: 2587; AVX512VLBW: # %bb.0: 2588; AVX512VLBW-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7] 2589; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8,23,22,21,20,19,18,17,16,31,30,29,28,27,26,25,24] 2590; AVX512VLBW-NEXT: retq 2591; 2592; AVX512VLVBMI-LABEL: shuffle_v32i8_39_38_37_36_35_34_33_32_15_14_13_12_11_10_09_08_55_54_53_52_51_50_49_48_31_30_29_28_27_26_25_24: 2593; AVX512VLVBMI: # %bb.0: 2594; AVX512VLVBMI-NEXT: vmovdqa {{.*#+}} ymm2 = [7,6,5,4,3,2,1,0,47,46,45,44,43,42,41,40,23,22,21,20,19,18,17,16,63,62,61,60,59,58,57,56] 2595; AVX512VLVBMI-NEXT: vpermi2b %ymm0, %ymm1, %ymm2 2596; AVX512VLVBMI-NEXT: vmovdqa %ymm2, %ymm0 2597; AVX512VLVBMI-NEXT: retq 2598; 2599; XOPAVX1-LABEL: shuffle_v32i8_39_38_37_36_35_34_33_32_15_14_13_12_11_10_09_08_55_54_53_52_51_50_49_48_31_30_29_28_27_26_25_24: 2600; XOPAVX1: # %bb.0: 2601; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 2602; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 2603; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [7,6,5,4,3,2,1,0,31,30,29,28,27,26,25,24] 2604; XOPAVX1-NEXT: vpperm %xmm4, %xmm2, %xmm3, %xmm2 2605; XOPAVX1-NEXT: vpperm %xmm4, %xmm0, %xmm1, %xmm0 2606; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 2607; XOPAVX1-NEXT: retq 2608; 2609; XOPAVX2-LABEL: shuffle_v32i8_39_38_37_36_35_34_33_32_15_14_13_12_11_10_09_08_55_54_53_52_51_50_49_48_31_30_29_28_27_26_25_24: 2610; XOPAVX2: # %bb.0: 2611; XOPAVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7] 2612; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8,23,22,21,20,19,18,17,16,31,30,29,28,27,26,25,24] 2613; XOPAVX2-NEXT: retq 2614 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24> 2615 ret <32 x i8> %shuffle 2616} 2617 2618define <32 x i8> @shuffle_v32i8_39_38_37_36_35_34_33_32_07_06_05_04_03_02_01_00_55_54_53_52_51_50_49_48_23_22_21_20_19_18_17_16(<32 x i8> %a, <32 x i8> %b) { 2619; AVX1-LABEL: shuffle_v32i8_39_38_37_36_35_34_33_32_07_06_05_04_03_02_01_00_55_54_53_52_51_50_49_48_23_22_21_20_19_18_17_16: 2620; AVX1: # %bb.0: 2621; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 2622; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 2623; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] 2624; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [14,12,10,8,6,4,2,0,15,13,11,9,7,5,3,1] 2625; AVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2 2626; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 2627; AVX1-NEXT: vpshufb %xmm3, %xmm0, %xmm0 2628; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 2629; AVX1-NEXT: retq 2630; 2631; AVX2-LABEL: shuffle_v32i8_39_38_37_36_35_34_33_32_07_06_05_04_03_02_01_00_55_54_53_52_51_50_49_48_23_22_21_20_19_18_17_16: 2632; AVX2: # %bb.0: 2633; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [7,6,5,4,3,2,1,0,7,6,5,4,3,2,1,0,7,6,5,4,3,2,1,0,7,6,5,4,3,2,1,0] 2634; AVX2-NEXT: vpshufb %ymm2, %ymm0, %ymm0 2635; AVX2-NEXT: vpshufb %ymm2, %ymm1, %ymm1 2636; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7] 2637; AVX2-NEXT: retq 2638; 2639; AVX512VLBW-LABEL: shuffle_v32i8_39_38_37_36_35_34_33_32_07_06_05_04_03_02_01_00_55_54_53_52_51_50_49_48_23_22_21_20_19_18_17_16: 2640; AVX512VLBW: # %bb.0: 2641; AVX512VLBW-NEXT: vpbroadcastq {{.*#+}} ymm2 = [7,6,5,4,3,2,1,0,7,6,5,4,3,2,1,0,7,6,5,4,3,2,1,0,7,6,5,4,3,2,1,0] 2642; AVX512VLBW-NEXT: vpshufb %ymm2, %ymm0, %ymm0 2643; AVX512VLBW-NEXT: vpshufb %ymm2, %ymm1, %ymm1 2644; AVX512VLBW-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7] 2645; AVX512VLBW-NEXT: retq 2646; 2647; AVX512VLVBMI-LABEL: shuffle_v32i8_39_38_37_36_35_34_33_32_07_06_05_04_03_02_01_00_55_54_53_52_51_50_49_48_23_22_21_20_19_18_17_16: 2648; AVX512VLVBMI: # %bb.0: 2649; AVX512VLVBMI-NEXT: vmovdqa {{.*#+}} ymm2 = [7,6,5,4,3,2,1,0,39,38,37,36,35,34,33,32,23,22,21,20,19,18,17,16,55,54,53,52,51,50,49,48] 2650; AVX512VLVBMI-NEXT: vpermi2b %ymm0, %ymm1, %ymm2 2651; AVX512VLVBMI-NEXT: vmovdqa %ymm2, %ymm0 2652; AVX512VLVBMI-NEXT: retq 2653; 2654; XOPAVX1-LABEL: shuffle_v32i8_39_38_37_36_35_34_33_32_07_06_05_04_03_02_01_00_55_54_53_52_51_50_49_48_23_22_21_20_19_18_17_16: 2655; XOPAVX1: # %bb.0: 2656; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 2657; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 2658; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [7,6,5,4,3,2,1,0,23,22,21,20,19,18,17,16] 2659; XOPAVX1-NEXT: vpperm %xmm4, %xmm2, %xmm3, %xmm2 2660; XOPAVX1-NEXT: vpperm %xmm4, %xmm0, %xmm1, %xmm0 2661; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 2662; XOPAVX1-NEXT: retq 2663; 2664; XOPAVX2-LABEL: shuffle_v32i8_39_38_37_36_35_34_33_32_07_06_05_04_03_02_01_00_55_54_53_52_51_50_49_48_23_22_21_20_19_18_17_16: 2665; XOPAVX2: # %bb.0: 2666; XOPAVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [7,6,5,4,3,2,1,0,7,6,5,4,3,2,1,0,7,6,5,4,3,2,1,0,7,6,5,4,3,2,1,0] 2667; XOPAVX2-NEXT: vpshufb %ymm2, %ymm0, %ymm0 2668; XOPAVX2-NEXT: vpshufb %ymm2, %ymm1, %ymm1 2669; XOPAVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7] 2670; XOPAVX2-NEXT: retq 2671 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16> 2672 ret <32 x i8> %shuffle 2673} 2674 2675define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00_16_16_16_16_16_16_16_16_16_16_16_16_16_16_17_16(<32 x i8> %a, <32 x i8> %b) { 2676; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00_16_16_16_16_16_16_16_16_16_16_16_16_16_16_17_16: 2677; AVX1: # %bb.0: 2678; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 2679; AVX1-NEXT: vpmovsxbw {{.*#+}} xmm2 = [0,0,0,0,0,0,0,1] 2680; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 2681; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 2682; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 2683; AVX1-NEXT: retq 2684; 2685; AVX2OR512VL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00_16_16_16_16_16_16_16_16_16_16_16_16_16_16_17_16: 2686; AVX2OR512VL: # %bb.0: 2687; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,17,16] 2688; AVX2OR512VL-NEXT: retq 2689; 2690; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00_16_16_16_16_16_16_16_16_16_16_16_16_16_16_17_16: 2691; XOPAVX1: # %bb.0: 2692; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 2693; XOPAVX1-NEXT: vpmovsxbw {{.*#+}} xmm2 = [0,0,0,0,0,0,0,1] 2694; XOPAVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 2695; XOPAVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 2696; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 2697; XOPAVX1-NEXT: retq 2698; 2699; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00_16_16_16_16_16_16_16_16_16_16_16_16_16_16_17_16: 2700; XOPAVX2: # %bb.0: 2701; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,17,16] 2702; XOPAVX2-NEXT: retq 2703 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 16> 2704 ret <32 x i8> %shuffle 2705} 2706 2707define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00_16_16_16_16_16_16_16_16_16_16_16_16_16_18_16_16(<32 x i8> %a, <32 x i8> %b) { 2708; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00_16_16_16_16_16_16_16_16_16_16_16_16_16_18_16_16: 2709; AVX1: # %bb.0: 2710; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 2711; AVX1-NEXT: vpmovsxwd {{.*#+}} xmm2 = [0,0,0,512] 2712; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 2713; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 2714; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 2715; AVX1-NEXT: retq 2716; 2717; AVX2OR512VL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00_16_16_16_16_16_16_16_16_16_16_16_16_16_18_16_16: 2718; AVX2OR512VL: # %bb.0: 2719; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,18,16,16] 2720; AVX2OR512VL-NEXT: retq 2721; 2722; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00_16_16_16_16_16_16_16_16_16_16_16_16_16_18_16_16: 2723; XOPAVX1: # %bb.0: 2724; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 2725; XOPAVX1-NEXT: vpmovsxwd {{.*#+}} xmm2 = [0,0,0,512] 2726; XOPAVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 2727; XOPAVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 2728; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 2729; XOPAVX1-NEXT: retq 2730; 2731; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00_16_16_16_16_16_16_16_16_16_16_16_16_16_18_16_16: 2732; XOPAVX2: # %bb.0: 2733; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,18,16,16] 2734; XOPAVX2-NEXT: retq 2735 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 18, i32 16, i32 16> 2736 ret <32 x i8> %shuffle 2737} 2738 2739define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_23_16_16_16_16_16_16_16(<32 x i8> %a, <32 x i8> %b) { 2740; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_23_16_16_16_16_16_16_16: 2741; AVX1: # %bb.0: 2742; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 2743; AVX1-NEXT: vpmovsxbq {{.*#+}} xmm2 = [0,7] 2744; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 2745; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 2746; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 2747; AVX1-NEXT: retq 2748; 2749; AVX2OR512VL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_23_16_16_16_16_16_16_16: 2750; AVX2OR512VL: # %bb.0: 2751; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,23,16,16,16,16,16,16,16] 2752; AVX2OR512VL-NEXT: retq 2753; 2754; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_23_16_16_16_16_16_16_16: 2755; XOPAVX1: # %bb.0: 2756; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 2757; XOPAVX1-NEXT: vpmovsxbq {{.*#+}} xmm2 = [0,7] 2758; XOPAVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 2759; XOPAVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 2760; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 2761; XOPAVX1-NEXT: retq 2762; 2763; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_23_16_16_16_16_16_16_16: 2764; XOPAVX2: # %bb.0: 2765; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,23,16,16,16,16,16,16,16] 2766; XOPAVX2-NEXT: retq 2767 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 23, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16> 2768 ret <32 x i8> %shuffle 2769} 2770 2771define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_24_16_16_16_16_16_16_16_16(<32 x i8> %a, <32 x i8> %b) { 2772; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_24_16_16_16_16_16_16_16_16: 2773; AVX1: # %bb.0: 2774; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 2775; AVX1-NEXT: vmovq {{.*#+}} xmm2 = [0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0] 2776; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 2777; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 2778; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 2779; AVX1-NEXT: retq 2780; 2781; AVX2OR512VL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_24_16_16_16_16_16_16_16_16: 2782; AVX2OR512VL: # %bb.0: 2783; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,24,16,16,16,16,16,16,16,16] 2784; AVX2OR512VL-NEXT: retq 2785; 2786; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_24_16_16_16_16_16_16_16_16: 2787; XOPAVX1: # %bb.0: 2788; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 2789; XOPAVX1-NEXT: vmovq {{.*#+}} xmm2 = [0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0] 2790; XOPAVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 2791; XOPAVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 2792; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 2793; XOPAVX1-NEXT: retq 2794; 2795; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_24_16_16_16_16_16_16_16_16: 2796; XOPAVX2: # %bb.0: 2797; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,24,16,16,16,16,16,16,16,16] 2798; XOPAVX2-NEXT: retq 2799 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 24, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16> 2800 ret <32 x i8> %shuffle 2801} 2802 2803define <32 x i8> @shuffle_v32i8_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_30_16_16_16_16_16_16_16_16_16_16_16_16_16_16(<32 x i8> %a, <32 x i8> %b) { 2804; AVX1-LABEL: shuffle_v32i8_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_30_16_16_16_16_16_16_16_16_16_16_16_16_16_16: 2805; AVX1: # %bb.0: 2806; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 2807; AVX1-NEXT: vmovd {{.*#+}} xmm2 = [0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0] 2808; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 2809; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 2810; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 2811; AVX1-NEXT: retq 2812; 2813; AVX2OR512VL-LABEL: shuffle_v32i8_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_30_16_16_16_16_16_16_16_16_16_16_16_16_16_16: 2814; AVX2OR512VL: # %bb.0: 2815; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,16,30,16,16,16,16,16,16,16,16,16,16,16,16,16,16] 2816; AVX2OR512VL-NEXT: retq 2817; 2818; XOPAVX1-LABEL: shuffle_v32i8_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_30_16_16_16_16_16_16_16_16_16_16_16_16_16_16: 2819; XOPAVX1: # %bb.0: 2820; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 2821; XOPAVX1-NEXT: vmovd {{.*#+}} xmm2 = [0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0] 2822; XOPAVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 2823; XOPAVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 2824; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 2825; XOPAVX1-NEXT: retq 2826; 2827; XOPAVX2-LABEL: shuffle_v32i8_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_30_16_16_16_16_16_16_16_16_16_16_16_16_16_16: 2828; XOPAVX2: # %bb.0: 2829; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,16,30,16,16,16,16,16,16,16,16,16,16,16,16,16,16] 2830; XOPAVX2-NEXT: retq 2831 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 14, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 30, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16> 2832 ret <32 x i8> %shuffle 2833} 2834 2835define <32 x i8> @shuffle_v32i8_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_31_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16(<32 x i8> %a, <32 x i8> %b) { 2836; AVX1-LABEL: shuffle_v32i8_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_31_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16: 2837; AVX1: # %bb.0: 2838; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 2839; AVX1-NEXT: vpmovsxbq {{.*#+}} xmm2 = [15,0] 2840; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 2841; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 2842; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 2843; AVX1-NEXT: retq 2844; 2845; AVX2OR512VL-LABEL: shuffle_v32i8_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_31_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16: 2846; AVX2OR512VL: # %bb.0: 2847; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,31,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] 2848; AVX2OR512VL-NEXT: retq 2849; 2850; XOPAVX1-LABEL: shuffle_v32i8_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_31_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16: 2851; XOPAVX1: # %bb.0: 2852; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 2853; XOPAVX1-NEXT: vpmovsxbq {{.*#+}} xmm2 = [15,0] 2854; XOPAVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 2855; XOPAVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 2856; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 2857; XOPAVX1-NEXT: retq 2858; 2859; XOPAVX2-LABEL: shuffle_v32i8_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_31_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16: 2860; XOPAVX2: # %bb.0: 2861; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,31,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] 2862; XOPAVX2-NEXT: retq 2863 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 31, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16> 2864 ret <32 x i8> %shuffle 2865} 2866 2867define <32 x i8> @shuffle_v32i8_00_32_01_33_02_34_03_35_04_36_05_37_06_38_07_39_16_48_17_49_18_50_19_51_20_52_21_53_22_54_23_55(<32 x i8> %a, <32 x i8> %b) { 2868; AVX1-LABEL: shuffle_v32i8_00_32_01_33_02_34_03_35_04_36_05_37_06_38_07_39_16_48_17_49_18_50_19_51_20_52_21_53_22_54_23_55: 2869; AVX1: # %bb.0: 2870; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 2871; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 2872; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] 2873; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 2874; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 2875; AVX1-NEXT: retq 2876; 2877; AVX2OR512VL-LABEL: shuffle_v32i8_00_32_01_33_02_34_03_35_04_36_05_37_06_38_07_39_16_48_17_49_18_50_19_51_20_52_21_53_22_54_23_55: 2878; AVX2OR512VL: # %bb.0: 2879; AVX2OR512VL-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] 2880; AVX2OR512VL-NEXT: retq 2881; 2882; XOPAVX1-LABEL: shuffle_v32i8_00_32_01_33_02_34_03_35_04_36_05_37_06_38_07_39_16_48_17_49_18_50_19_51_20_52_21_53_22_54_23_55: 2883; XOPAVX1: # %bb.0: 2884; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 2885; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 2886; XOPAVX1-NEXT: vpunpcklbw {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] 2887; XOPAVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 2888; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 2889; XOPAVX1-NEXT: retq 2890; 2891; XOPAVX2-LABEL: shuffle_v32i8_00_32_01_33_02_34_03_35_04_36_05_37_06_38_07_39_16_48_17_49_18_50_19_51_20_52_21_53_22_54_23_55: 2892; XOPAVX2: # %bb.0: 2893; XOPAVX2-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] 2894; XOPAVX2-NEXT: retq 2895 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 16, i32 48, i32 17, i32 49, i32 18, i32 50, i32 19, i32 51, i32 20, i32 52, i32 21, i32 53, i32 22, i32 54, i32 23, i32 55> 2896 ret <32 x i8> %shuffle 2897} 2898 2899define <32 x i8> @shuffle_v32i8_08_40_09_41_10_42_11_43_12_44_13_45_14_46_15_47_24_56_25_57_26_58_27_59_28_60_29_61_30_62_31_63(<32 x i8> %a, <32 x i8> %b) { 2900; AVX1-LABEL: shuffle_v32i8_08_40_09_41_10_42_11_43_12_44_13_45_14_46_15_47_24_56_25_57_26_58_27_59_28_60_29_61_30_62_31_63: 2901; AVX1: # %bb.0: 2902; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 2903; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 2904; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm3[8],xmm2[8],xmm3[9],xmm2[9],xmm3[10],xmm2[10],xmm3[11],xmm2[11],xmm3[12],xmm2[12],xmm3[13],xmm2[13],xmm3[14],xmm2[14],xmm3[15],xmm2[15] 2905; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] 2906; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 2907; AVX1-NEXT: retq 2908; 2909; AVX2OR512VL-LABEL: shuffle_v32i8_08_40_09_41_10_42_11_43_12_44_13_45_14_46_15_47_24_56_25_57_26_58_27_59_28_60_29_61_30_62_31_63: 2910; AVX2OR512VL: # %bb.0: 2911; AVX2OR512VL-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] 2912; AVX2OR512VL-NEXT: retq 2913; 2914; XOPAVX1-LABEL: shuffle_v32i8_08_40_09_41_10_42_11_43_12_44_13_45_14_46_15_47_24_56_25_57_26_58_27_59_28_60_29_61_30_62_31_63: 2915; XOPAVX1: # %bb.0: 2916; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 2917; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 2918; XOPAVX1-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm3[8],xmm2[8],xmm3[9],xmm2[9],xmm3[10],xmm2[10],xmm3[11],xmm2[11],xmm3[12],xmm2[12],xmm3[13],xmm2[13],xmm3[14],xmm2[14],xmm3[15],xmm2[15] 2919; XOPAVX1-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] 2920; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 2921; XOPAVX1-NEXT: retq 2922; 2923; XOPAVX2-LABEL: shuffle_v32i8_08_40_09_41_10_42_11_43_12_44_13_45_14_46_15_47_24_56_25_57_26_58_27_59_28_60_29_61_30_62_31_63: 2924; XOPAVX2: # %bb.0: 2925; XOPAVX2-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] 2926; XOPAVX2-NEXT: retq 2927 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 8, i32 40, i32 9, i32 41, i32 10, i32 42, i32 11, i32 43, i32 12, i32 44, i32 13, i32 45, i32 14, i32 46, i32 15, i32 47, i32 24, i32 56, i32 25, i32 57, i32 26, i32 58, i32 27, i32 59, i32 28, i32 60, i32 29, i32 61, i32 30, i32 62, i32 31, i32 63> 2928 ret <32 x i8> %shuffle 2929} 2930 2931define <32 x i8> @shuffle_v32i8_00_32_01_33_02_34_03_35_04_36_05_37_06_38_07_39_24_56_25_57_26_58_27_59_28_60_29_61_30_62_31_63(<32 x i8> %a, <32 x i8> %b) { 2932; AVX1-LABEL: shuffle_v32i8_00_32_01_33_02_34_03_35_04_36_05_37_06_38_07_39_24_56_25_57_26_58_27_59_28_60_29_61_30_62_31_63: 2933; AVX1: # %bb.0: 2934; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 2935; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 2936; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm3[8],xmm2[8],xmm3[9],xmm2[9],xmm3[10],xmm2[10],xmm3[11],xmm2[11],xmm3[12],xmm2[12],xmm3[13],xmm2[13],xmm3[14],xmm2[14],xmm3[15],xmm2[15] 2937; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 2938; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 2939; AVX1-NEXT: retq 2940; 2941; AVX2-LABEL: shuffle_v32i8_00_32_01_33_02_34_03_35_04_36_05_37_06_38_07_39_24_56_25_57_26_58_27_59_28_60_29_61_30_62_31_63: 2942; AVX2: # %bb.0: 2943; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,1,3,3] 2944; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,3,3] 2945; AVX2-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] 2946; AVX2-NEXT: retq 2947; 2948; AVX512VLBW-LABEL: shuffle_v32i8_00_32_01_33_02_34_03_35_04_36_05_37_06_38_07_39_24_56_25_57_26_58_27_59_28_60_29_61_30_62_31_63: 2949; AVX512VLBW: # %bb.0: 2950; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,1,3,3] 2951; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,3,3] 2952; AVX512VLBW-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] 2953; AVX512VLBW-NEXT: retq 2954; 2955; AVX512VLVBMI-LABEL: shuffle_v32i8_00_32_01_33_02_34_03_35_04_36_05_37_06_38_07_39_24_56_25_57_26_58_27_59_28_60_29_61_30_62_31_63: 2956; AVX512VLVBMI: # %bb.0: 2957; AVX512VLVBMI-NEXT: vmovdqa {{.*#+}} ymm2 = [0,32,1,33,2,34,3,35,4,36,5,37,6,38,7,39,24,56,25,57,26,58,27,59,28,60,29,61,30,62,31,63] 2958; AVX512VLVBMI-NEXT: vpermt2b %ymm1, %ymm2, %ymm0 2959; AVX512VLVBMI-NEXT: retq 2960; 2961; XOPAVX1-LABEL: shuffle_v32i8_00_32_01_33_02_34_03_35_04_36_05_37_06_38_07_39_24_56_25_57_26_58_27_59_28_60_29_61_30_62_31_63: 2962; XOPAVX1: # %bb.0: 2963; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 2964; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 2965; XOPAVX1-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm3[8],xmm2[8],xmm3[9],xmm2[9],xmm3[10],xmm2[10],xmm3[11],xmm2[11],xmm3[12],xmm2[12],xmm3[13],xmm2[13],xmm3[14],xmm2[14],xmm3[15],xmm2[15] 2966; XOPAVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 2967; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 2968; XOPAVX1-NEXT: retq 2969; 2970; XOPAVX2-LABEL: shuffle_v32i8_00_32_01_33_02_34_03_35_04_36_05_37_06_38_07_39_24_56_25_57_26_58_27_59_28_60_29_61_30_62_31_63: 2971; XOPAVX2: # %bb.0: 2972; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,1,3,3] 2973; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,3,3] 2974; XOPAVX2-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] 2975; XOPAVX2-NEXT: retq 2976 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 24, i32 56, i32 25, i32 57, i32 26, i32 58, i32 27, i32 59, i32 28, i32 60, i32 29, i32 61, i32 30, i32 62, i32 31, i32 63> 2977 ret <32 x i8> %shuffle 2978} 2979 2980define <32 x i8> @shuffle_v32i8_08_40_09_41_10_42_11_43_12_44_13_45_14_46_15_47_16_48_17_49_18_50_19_51_20_52_21_53_22_54_23_55(<32 x i8> %a, <32 x i8> %b) { 2981; AVX1-LABEL: shuffle_v32i8_08_40_09_41_10_42_11_43_12_44_13_45_14_46_15_47_16_48_17_49_18_50_19_51_20_52_21_53_22_54_23_55: 2982; AVX1: # %bb.0: 2983; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 2984; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 2985; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] 2986; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] 2987; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 2988; AVX1-NEXT: retq 2989; 2990; AVX2-LABEL: shuffle_v32i8_08_40_09_41_10_42_11_43_12_44_13_45_14_46_15_47_16_48_17_49_18_50_19_51_20_52_21_53_22_54_23_55: 2991; AVX2: # %bb.0: 2992; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[1,1,2,3] 2993; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,1,2,3] 2994; AVX2-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] 2995; AVX2-NEXT: retq 2996; 2997; AVX512VLBW-LABEL: shuffle_v32i8_08_40_09_41_10_42_11_43_12_44_13_45_14_46_15_47_16_48_17_49_18_50_19_51_20_52_21_53_22_54_23_55: 2998; AVX512VLBW: # %bb.0: 2999; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm1 = ymm1[1,1,2,3] 3000; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,1,2,3] 3001; AVX512VLBW-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] 3002; AVX512VLBW-NEXT: retq 3003; 3004; AVX512VLVBMI-LABEL: shuffle_v32i8_08_40_09_41_10_42_11_43_12_44_13_45_14_46_15_47_16_48_17_49_18_50_19_51_20_52_21_53_22_54_23_55: 3005; AVX512VLVBMI: # %bb.0: 3006; AVX512VLVBMI-NEXT: vmovdqa {{.*#+}} ymm2 = [8,40,9,41,10,42,11,43,12,44,13,45,14,46,15,47,16,48,17,49,18,50,19,51,20,52,21,53,22,54,23,55] 3007; AVX512VLVBMI-NEXT: vpermt2b %ymm1, %ymm2, %ymm0 3008; AVX512VLVBMI-NEXT: retq 3009; 3010; XOPAVX1-LABEL: shuffle_v32i8_08_40_09_41_10_42_11_43_12_44_13_45_14_46_15_47_16_48_17_49_18_50_19_51_20_52_21_53_22_54_23_55: 3011; XOPAVX1: # %bb.0: 3012; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 3013; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 3014; XOPAVX1-NEXT: vpunpcklbw {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] 3015; XOPAVX1-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] 3016; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 3017; XOPAVX1-NEXT: retq 3018; 3019; XOPAVX2-LABEL: shuffle_v32i8_08_40_09_41_10_42_11_43_12_44_13_45_14_46_15_47_16_48_17_49_18_50_19_51_20_52_21_53_22_54_23_55: 3020; XOPAVX2: # %bb.0: 3021; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[1,1,2,3] 3022; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,1,2,3] 3023; XOPAVX2-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] 3024; XOPAVX2-NEXT: retq 3025 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 8, i32 40, i32 9, i32 41, i32 10, i32 42, i32 11, i32 43, i32 12, i32 44, i32 13, i32 45, i32 14, i32 46, i32 15, i32 47, i32 16, i32 48, i32 17, i32 49, i32 18, i32 50, i32 19, i32 51, i32 20, i32 52, i32 21, i32 53, i32 22, i32 54, i32 23, i32 55> 3026 ret <32 x i8> %shuffle 3027} 3028 3029define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00_16_17_16_16_16_16_16_16_16_16_16_16_16_16_16_16(<32 x i8> %a, <32 x i8> %b) { 3030; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00_16_17_16_16_16_16_16_16_16_16_16_16_16_16_16_16: 3031; AVX1: # %bb.0: 3032; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0] 3033; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 3034; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0] 3035; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 3036; AVX1-NEXT: retq 3037; 3038; AVX2OR512VL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00_16_17_16_16_16_16_16_16_16_16_16_16_16_16_16_16: 3039; AVX2OR512VL: # %bb.0: 3040; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,16,17,16,16,16,16,16,16,16,16,16,16,16,16,16,16] 3041; AVX2OR512VL-NEXT: retq 3042; 3043; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00_16_17_16_16_16_16_16_16_16_16_16_16_16_16_16_16: 3044; XOPAVX1: # %bb.0: 3045; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0] 3046; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 3047; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0] 3048; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 3049; XOPAVX1-NEXT: retq 3050; 3051; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00_16_17_16_16_16_16_16_16_16_16_16_16_16_16_16_16: 3052; XOPAVX2: # %bb.0: 3053; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,16,17,16,16,16,16,16,16,16,16,16,16,16,16,16,16] 3054; XOPAVX2-NEXT: retq 3055 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 16, i32 17, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16> 3056 ret <32 x i8> %shuffle 3057} 3058 3059define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00_16_16_18_16_16_16_16_16_16_16_16_16_16_16_16_16(<32 x i8> %a, <32 x i8> %b) { 3060; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00_16_16_18_16_16_16_16_16_16_16_16_16_16_16_16_16: 3061; AVX1: # %bb.0: 3062; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0] 3063; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 3064; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0] 3065; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 3066; AVX1-NEXT: retq 3067; 3068; AVX2OR512VL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00_16_16_18_16_16_16_16_16_16_16_16_16_16_16_16_16: 3069; AVX2OR512VL: # %bb.0: 3070; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,16,16,18,16,16,16,16,16,16,16,16,16,16,16,16,16] 3071; AVX2OR512VL-NEXT: retq 3072; 3073; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00_16_16_18_16_16_16_16_16_16_16_16_16_16_16_16_16: 3074; XOPAVX1: # %bb.0: 3075; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0] 3076; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 3077; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0] 3078; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 3079; XOPAVX1-NEXT: retq 3080; 3081; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00_16_16_18_16_16_16_16_16_16_16_16_16_16_16_16_16: 3082; XOPAVX2: # %bb.0: 3083; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,16,16,18,16,16,16,16,16,16,16,16,16,16,16,16,16] 3084; XOPAVX2-NEXT: retq 3085 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0, i32 16, i32 16, i32 18, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16> 3086 ret <32 x i8> %shuffle 3087} 3088 3089define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00_16_16_16_16_16_16_16_23_16_16_16_16_16_16_16_16(<32 x i8> %a, <32 x i8> %b) { 3090; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00_16_16_16_16_16_16_16_23_16_16_16_16_16_16_16_16: 3091; AVX1: # %bb.0: 3092; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0] 3093; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 3094; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,0] 3095; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 3096; AVX1-NEXT: retq 3097; 3098; AVX2OR512VL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00_16_16_16_16_16_16_16_23_16_16_16_16_16_16_16_16: 3099; AVX2OR512VL: # %bb.0: 3100; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,16,16,16,16,16,16,16,23,16,16,16,16,16,16,16,16] 3101; AVX2OR512VL-NEXT: retq 3102; 3103; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00_16_16_16_16_16_16_16_23_16_16_16_16_16_16_16_16: 3104; XOPAVX1: # %bb.0: 3105; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0] 3106; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 3107; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,0] 3108; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 3109; XOPAVX1-NEXT: retq 3110; 3111; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00_16_16_16_16_16_16_16_23_16_16_16_16_16_16_16_16: 3112; XOPAVX2: # %bb.0: 3113; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,16,16,16,16,16,16,16,23,16,16,16,16,16,16,16,16] 3114; XOPAVX2-NEXT: retq 3115 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 23, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16> 3116 ret <32 x i8> %shuffle 3117} 3118 3119define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_24_16_16_16_16_16_16_16(<32 x i8> %a, <32 x i8> %b) { 3120; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_24_16_16_16_16_16_16_16: 3121; AVX1: # %bb.0: 3122; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0] 3123; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 3124; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,8,0,0,0,0,0,0,0] 3125; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 3126; AVX1-NEXT: retq 3127; 3128; AVX2OR512VL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_24_16_16_16_16_16_16_16: 3129; AVX2OR512VL: # %bb.0: 3130; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,24,16,16,16,16,16,16,16] 3131; AVX2OR512VL-NEXT: retq 3132; 3133; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_24_16_16_16_16_16_16_16: 3134; XOPAVX1: # %bb.0: 3135; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0] 3136; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 3137; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,8,0,0,0,0,0,0,0] 3138; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 3139; XOPAVX1-NEXT: retq 3140; 3141; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_24_16_16_16_16_16_16_16: 3142; XOPAVX2: # %bb.0: 3143; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,24,16,16,16,16,16,16,16] 3144; XOPAVX2-NEXT: retq 3145 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 24, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16> 3146 ret <32 x i8> %shuffle 3147} 3148 3149define <32 x i8> @shuffle_v32i8_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_16_16_16_16_16_16_30_16(<32 x i8> %a, <32 x i8> %b) { 3150; AVX1-LABEL: shuffle_v32i8_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_16_16_16_16_16_16_30_16: 3151; AVX1: # %bb.0: 3152; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0] 3153; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 3154; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,14,0] 3155; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 3156; AVX1-NEXT: retq 3157; 3158; AVX2OR512VL-LABEL: shuffle_v32i8_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_16_16_16_16_16_16_30_16: 3159; AVX2OR512VL: # %bb.0: 3160; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,30,16] 3161; AVX2OR512VL-NEXT: retq 3162; 3163; XOPAVX1-LABEL: shuffle_v32i8_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_16_16_16_16_16_16_30_16: 3164; XOPAVX1: # %bb.0: 3165; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0] 3166; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 3167; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,14,0] 3168; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 3169; XOPAVX1-NEXT: retq 3170; 3171; XOPAVX2-LABEL: shuffle_v32i8_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_16_16_16_16_16_16_30_16: 3172; XOPAVX2: # %bb.0: 3173; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,30,16] 3174; XOPAVX2-NEXT: retq 3175 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 14, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 30, i32 16> 3176 ret <32 x i8> %shuffle 3177} 3178 3179define <32 x i8> @shuffle_v32i8_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_31(<32 x i8> %a, <32 x i8> %b) { 3180; AVX1-LABEL: shuffle_v32i8_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_31: 3181; AVX1: # %bb.0: 3182; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0] 3183; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 3184; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,15] 3185; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 3186; AVX1-NEXT: retq 3187; 3188; AVX2OR512VL-LABEL: shuffle_v32i8_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_31: 3189; AVX2OR512VL: # %bb.0: 3190; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,31] 3191; AVX2OR512VL-NEXT: retq 3192; 3193; XOPAVX1-LABEL: shuffle_v32i8_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_31: 3194; XOPAVX1: # %bb.0: 3195; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0] 3196; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 3197; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,15] 3198; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 3199; XOPAVX1-NEXT: retq 3200; 3201; XOPAVX2-LABEL: shuffle_v32i8_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_31: 3202; XOPAVX2: # %bb.0: 3203; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,31] 3204; XOPAVX2-NEXT: retq 3205 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 31> 3206 ret <32 x i8> %shuffle 3207} 3208 3209define <32 x i8> @shuffle_v32i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12_28_28_28_28_24_24_24_24_20_20_20_20_16_16_16_16(<32 x i8> %a, <32 x i8> %b) { 3210; AVX1-LABEL: shuffle_v32i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12_28_28_28_28_24_24_24_24_20_20_20_20_16_16_16_16: 3211; AVX1: # %bb.0: 3212; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,0,0,0,4,4,4,4,8,8,8,8,12,12,12,12] 3213; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 3214; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[12,12,12,12,8,8,8,8,4,4,4,4,0,0,0,0] 3215; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 3216; AVX1-NEXT: retq 3217; 3218; AVX2OR512VL-LABEL: shuffle_v32i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12_28_28_28_28_24_24_24_24_20_20_20_20_16_16_16_16: 3219; AVX2OR512VL: # %bb.0: 3220; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4,8,8,8,8,12,12,12,12,28,28,28,28,24,24,24,24,20,20,20,20,16,16,16,16] 3221; AVX2OR512VL-NEXT: retq 3222; 3223; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12_28_28_28_28_24_24_24_24_20_20_20_20_16_16_16_16: 3224; XOPAVX1: # %bb.0: 3225; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,0,0,0,4,4,4,4,8,8,8,8,12,12,12,12] 3226; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 3227; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[12,12,12,12,8,8,8,8,4,4,4,4,0,0,0,0] 3228; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 3229; XOPAVX1-NEXT: retq 3230; 3231; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12_28_28_28_28_24_24_24_24_20_20_20_20_16_16_16_16: 3232; XOPAVX2: # %bb.0: 3233; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4,8,8,8,8,12,12,12,12,28,28,28,28,24,24,24,24,20,20,20,20,16,16,16,16] 3234; XOPAVX2-NEXT: retq 3235 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4, i32 8, i32 8, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12, i32 28, i32 28, i32 28, i32 28, i32 24, i32 24, i32 24, i32 24, i32 20, i32 20, i32 20, i32 20, i32 16, i32 16, i32 16, i32 16> 3236 ret <32 x i8> %shuffle 3237} 3238 3239define <32 x i8> @shuffle_v32i8_08_08_08_08_08_08_08_08_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24(<32 x i8> %a, <32 x i8> %b) { 3240; AVX1-LABEL: shuffle_v32i8_08_08_08_08_08_08_08_08_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24: 3241; AVX1: # %bb.0: 3242; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[8,8,8,8,8,8,8,8,0,0,0,0,0,0,0,0] 3243; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 3244; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8] 3245; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 3246; AVX1-NEXT: retq 3247; 3248; AVX2OR512VL-LABEL: shuffle_v32i8_08_08_08_08_08_08_08_08_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24: 3249; AVX2OR512VL: # %bb.0: 3250; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,8,8,8,8,8,8,8,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,24,24,24,24,24,24,24,24] 3251; AVX2OR512VL-NEXT: retq 3252; 3253; XOPAVX1-LABEL: shuffle_v32i8_08_08_08_08_08_08_08_08_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24: 3254; XOPAVX1: # %bb.0: 3255; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[8,8,8,8,8,8,8,8,0,0,0,0,0,0,0,0] 3256; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 3257; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8] 3258; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 3259; XOPAVX1-NEXT: retq 3260; 3261; XOPAVX2-LABEL: shuffle_v32i8_08_08_08_08_08_08_08_08_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24: 3262; XOPAVX2: # %bb.0: 3263; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,8,8,8,8,8,8,8,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,24,24,24,24,24,24,24,24] 3264; XOPAVX2-NEXT: retq 3265 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24> 3266 ret <32 x i8> %shuffle 3267} 3268 3269define <32 x i8> @shuffle_v32i8_00_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_16_16_16_16_uu_uu_uu_uu_uu_16_16_16_16_16_30_16(<32 x i8> %a, <32 x i8> %b) { 3270; AVX1-LABEL: shuffle_v32i8_00_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_16_16_16_16_uu_uu_uu_uu_uu_16_16_16_16_16_30_16: 3271; AVX1: # %bb.0: 3272; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 3273; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,0,0,0,u,u,u,u,u,0,0,0,0,0,14,0] 3274; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 3275; AVX1-NEXT: retq 3276; 3277; AVX2OR512VL-LABEL: shuffle_v32i8_00_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_16_16_16_16_uu_uu_uu_uu_uu_16_16_16_16_16_30_16: 3278; AVX2OR512VL: # %bb.0: 3279; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,16,16,16,16,u,u,u,u,u,16,16,16,16,16,30,16] 3280; AVX2OR512VL-NEXT: retq 3281; 3282; XOPAVX1-LABEL: shuffle_v32i8_00_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_16_16_16_16_uu_uu_uu_uu_uu_16_16_16_16_16_30_16: 3283; XOPAVX1: # %bb.0: 3284; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 3285; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,0,0,0,u,u,u,u,u,0,0,0,0,0,14,0] 3286; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 3287; XOPAVX1-NEXT: retq 3288; 3289; XOPAVX2-LABEL: shuffle_v32i8_00_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_16_16_16_16_uu_uu_uu_uu_uu_16_16_16_16_16_30_16: 3290; XOPAVX2: # %bb.0: 3291; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,16,16,16,16,u,u,u,u,u,16,16,16,16,16,30,16] 3292; XOPAVX2-NEXT: retq 3293 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 16, i32 16, i32 16, i32 16, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 16, i32 16, i32 16, i32 16, i32 16, i32 30, i32 16> 3294 ret <32 x i8> %shuffle 3295} 3296 3297define <32 x i8> @shuffle_v32i8_uu_14_uu_uu_00_00_00_00_00_00_00_00_00_00_00_00_16_16_uu_16_uu_uu_uu_uu_16_16_16_16_16_16_30_16(<32 x i8> %a, <32 x i8> %b) { 3298; AVX1-LABEL: shuffle_v32i8_uu_14_uu_uu_00_00_00_00_00_00_00_00_00_00_00_00_16_16_uu_16_uu_uu_uu_uu_16_16_16_16_16_16_30_16: 3299; AVX1: # %bb.0: 3300; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[14,14,1,1,0,0,0,0,0,0,0,0,0,0,0,0] 3301; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 3302; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,u,0,u,u,u,u,0,0,0,0,0,0,14,0] 3303; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 3304; AVX1-NEXT: retq 3305; 3306; AVX2OR512VL-LABEL: shuffle_v32i8_uu_14_uu_uu_00_00_00_00_00_00_00_00_00_00_00_00_16_16_uu_16_uu_uu_uu_uu_16_16_16_16_16_16_30_16: 3307; AVX2OR512VL: # %bb.0: 3308; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,14,u,u,0,0,0,0,0,0,0,0,0,0,0,0,16,16,u,16,u,u,u,u,16,16,16,16,16,16,30,16] 3309; AVX2OR512VL-NEXT: retq 3310; 3311; XOPAVX1-LABEL: shuffle_v32i8_uu_14_uu_uu_00_00_00_00_00_00_00_00_00_00_00_00_16_16_uu_16_uu_uu_uu_uu_16_16_16_16_16_16_30_16: 3312; XOPAVX1: # %bb.0: 3313; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[14,14,1,1,0,0,0,0,0,0,0,0,0,0,0,0] 3314; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 3315; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,u,0,u,u,u,u,0,0,0,0,0,0,14,0] 3316; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 3317; XOPAVX1-NEXT: retq 3318; 3319; XOPAVX2-LABEL: shuffle_v32i8_uu_14_uu_uu_00_00_00_00_00_00_00_00_00_00_00_00_16_16_uu_16_uu_uu_uu_uu_16_16_16_16_16_16_30_16: 3320; XOPAVX2: # %bb.0: 3321; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,14,u,u,0,0,0,0,0,0,0,0,0,0,0,0,16,16,u,16,u,u,u,u,16,16,16,16,16,16,30,16] 3322; XOPAVX2-NEXT: retq 3323 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 poison, i32 14, i32 poison, i32 poison, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 16, i32 poison, i32 16, i32 poison, i32 poison, i32 poison, i32 poison, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 30, i32 16> 3324 ret <32 x i8> %shuffle 3325} 3326 3327define <32 x i8> @shuffle_v32i8_00_00_00_uu_uu_uu_04_uu_08_08_08_08_uu_uu_12_uu_28_28_28_28_uu_uu_uu_24_20_20_20_20_16_16_16_16(<32 x i8> %a, <32 x i8> %b) { 3328; AVX1-LABEL: shuffle_v32i8_00_00_00_uu_uu_uu_04_uu_08_08_08_08_uu_uu_12_uu_28_28_28_28_uu_uu_uu_24_20_20_20_20_16_16_16_16: 3329; AVX1: # %bb.0: 3330; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,0,0,0,4,4,4,4,8,8,8,8,12,12,12,12] 3331; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 3332; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[12,12,12,12,8,8,8,8,4,4,4,4,0,0,0,0] 3333; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 3334; AVX1-NEXT: retq 3335; 3336; AVX2OR512VL-LABEL: shuffle_v32i8_00_00_00_uu_uu_uu_04_uu_08_08_08_08_uu_uu_12_uu_28_28_28_28_uu_uu_uu_24_20_20_20_20_16_16_16_16: 3337; AVX2OR512VL: # %bb.0: 3338; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,u,u,u,4,u,8,8,8,8,u,u,12,u,28,28,28,28,u,u,u,24,20,20,20,20,16,16,16,16] 3339; AVX2OR512VL-NEXT: retq 3340; 3341; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_uu_uu_uu_04_uu_08_08_08_08_uu_uu_12_uu_28_28_28_28_uu_uu_uu_24_20_20_20_20_16_16_16_16: 3342; XOPAVX1: # %bb.0: 3343; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,0,0,0,4,4,4,4,8,8,8,8,12,12,12,12] 3344; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 3345; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[12,12,12,12,8,8,8,8,4,4,4,4,0,0,0,0] 3346; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 3347; XOPAVX1-NEXT: retq 3348; 3349; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_uu_uu_uu_04_uu_08_08_08_08_uu_uu_12_uu_28_28_28_28_uu_uu_uu_24_20_20_20_20_16_16_16_16: 3350; XOPAVX2: # %bb.0: 3351; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,u,u,u,4,u,8,8,8,8,u,u,12,u,28,28,28,28,u,u,u,24,20,20,20,20,16,16,16,16] 3352; XOPAVX2-NEXT: retq 3353 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 poison, i32 poison, i32 poison, i32 4, i32 poison, i32 8, i32 8, i32 8, i32 8, i32 poison, i32 poison, i32 12, i32 poison, i32 28, i32 28, i32 28, i32 28, i32 poison, i32 poison, i32 poison, i32 24, i32 20, i32 20, i32 20, i32 20, i32 16, i32 16, i32 16, i32 16> 3354 ret <32 x i8> %shuffle 3355} 3356 3357define <32 x i8> @shuffle_v32i8_08_08_08_08_08_08_08_08_uu_uu_uu_uu_uu_uu_uu_uu_16_16_16_uu_uu_uu_uu_uu_uu_uu_24_24_24_24_24_24(<32 x i8> %a, <32 x i8> %b) { 3358; AVX1-LABEL: shuffle_v32i8_08_08_08_08_08_08_08_08_uu_uu_uu_uu_uu_uu_uu_uu_16_16_16_uu_uu_uu_uu_uu_uu_uu_24_24_24_24_24_24: 3359; AVX1: # %bb.0: 3360; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 3361; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7] 3362; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 3363; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,8,8,8,8,8,8,8,8,8,8,8,8] 3364; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 3365; AVX1-NEXT: retq 3366; 3367; AVX2OR512VL-LABEL: shuffle_v32i8_08_08_08_08_08_08_08_08_uu_uu_uu_uu_uu_uu_uu_uu_16_16_16_uu_uu_uu_uu_uu_uu_uu_24_24_24_24_24_24: 3368; AVX2OR512VL: # %bb.0: 3369; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,8,8,8,8,8,8,8,u,u,u,u,u,u,u,u,16,16,16,u,u,u,u,u,u,u,24,24,24,24,24,24] 3370; AVX2OR512VL-NEXT: retq 3371; 3372; XOPAVX1-LABEL: shuffle_v32i8_08_08_08_08_08_08_08_08_uu_uu_uu_uu_uu_uu_uu_uu_16_16_16_uu_uu_uu_uu_uu_uu_uu_24_24_24_24_24_24: 3373; XOPAVX1: # %bb.0: 3374; XOPAVX1-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 3375; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7] 3376; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 3377; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,8,8,8,8,8,8,8,8,8,8,8,8] 3378; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 3379; XOPAVX1-NEXT: retq 3380; 3381; XOPAVX2-LABEL: shuffle_v32i8_08_08_08_08_08_08_08_08_uu_uu_uu_uu_uu_uu_uu_uu_16_16_16_uu_uu_uu_uu_uu_uu_uu_24_24_24_24_24_24: 3382; XOPAVX2: # %bb.0: 3383; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,8,8,8,8,8,8,8,u,u,u,u,u,u,u,u,16,16,16,u,u,u,u,u,u,u,24,24,24,24,24,24] 3384; XOPAVX2-NEXT: retq 3385 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 16, i32 16, i32 16, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24> 3386 ret <32 x i8> %shuffle 3387} 3388 3389define <32 x i8> @shuffle_v32i8_42_45_12_13_35_35_60_40_17_22_29_44_33_12_48_51_20_19_52_19_49_54_37_32_48_42_59_07_36_34_36_39(<32 x i8> %a, <32 x i8> %b) { 3390; AVX1-LABEL: shuffle_v32i8_42_45_12_13_35_35_60_40_17_22_29_44_33_12_48_51_20_19_52_19_49_54_37_32_48_42_59_07_36_34_36_39: 3391; AVX1: # %bb.0: 3392; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 3393; AVX1-NEXT: vpshufb {{.*#+}} xmm3 = xmm2[u,u,4,u,1,6],zero,zero,xmm2[0],zero,xmm2[11,u],zero,zero,zero,zero 3394; AVX1-NEXT: vpshufb {{.*#+}} xmm4 = xmm1[u,u],zero,xmm1[u],zero,zero,xmm1[5,0],zero,xmm1[10],zero,xmm1[u,4,2,4,7] 3395; AVX1-NEXT: vpor %xmm3, %xmm4, %xmm3 3396; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 3397; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm5 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3],xmm4[4],xmm0[4],xmm4[5],xmm0[5],xmm4[6],xmm0[6],xmm4[7],xmm0[7] 3398; AVX1-NEXT: vpshufb {{.*#+}} xmm5 = xmm5[8,6,u,6,u,u,u,u,u,u,u,15,u,u,u,u] 3399; AVX1-NEXT: vmovdqa {{.*#+}} xmm6 = [0,0,255,0,255,255,255,255,255,255,255,0,255,255,255,255] 3400; AVX1-NEXT: vpblendvb %xmm6, %xmm3, %xmm5, %xmm3 3401; AVX1-NEXT: vpshufb {{.*#+}} xmm2 = zero,zero,xmm2[u,u],zero,zero,xmm2[12],zero,xmm2[u,u,u],zero,zero,xmm2[u,0,3] 3402; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[10,13,u,u,3,3],zero,xmm1[8,u,u,u,12,1,u],zero,zero 3403; AVX1-NEXT: vpor %xmm2, %xmm1, %xmm1 3404; AVX1-NEXT: vpshufb {{.*#+}} xmm2 = xmm4[u,u],zero,zero,xmm4[u,u,u,u,1,6,13,u,u],zero,xmm4[u,u] 3405; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[u,u,12,13,u,u,u,u],zero,zero,zero,xmm0[u,u,12,u,u] 3406; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0 3407; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [255,255,0,0,255,255,255,255,0,0,0,255,255,0,255,255] 3408; AVX1-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 3409; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 3410; AVX1-NEXT: retq 3411; 3412; AVX2-LABEL: shuffle_v32i8_42_45_12_13_35_35_60_40_17_22_29_44_33_12_48_51_20_19_52_19_49_54_37_32_48_42_59_07_36_34_36_39: 3413; AVX2: # %bb.0: 3414; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,12,12,13,1,6,13,7,u,u,u,u,u,u,u,u,u,u,u,u,17,22,29,23,20,19,u,19,u,u,u,u] 3415; AVX2-NEXT: vpmovsxbq {{.*#+}} ymm2 = [0,5,6,1] 3416; AVX2-NEXT: vpermd %ymm0, %ymm2, %ymm0 3417; AVX2-NEXT: vpshufb {{.*#+}} ymm2 = ymm1[10,13],zero,zero,ymm1[3,3],zero,ymm1[8],zero,zero,zero,ymm1[12,1],zero,zero,zero,zero,zero,ymm1[20],zero,ymm1[17,22],zero,zero,ymm1[16],zero,ymm1[27],zero,zero,zero,zero,zero 3418; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,3,0,1] 3419; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = zero,zero,zero,zero,zero,zero,ymm1[12],zero,zero,zero,zero,zero,zero,zero,ymm1[0,3],zero,zero,zero,zero,zero,zero,ymm1[21,16],zero,ymm1[26],zero,zero,ymm1[20,18,20,23] 3420; AVX2-NEXT: vpor %ymm1, %ymm2, %ymm1 3421; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,0,0,255,255,255,255,0,0,0,255,255,0,255,255,0,0,255,0,255,255,255,255,255,255,255,0,255,255,255,255] 3422; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 3423; AVX2-NEXT: retq 3424; 3425; AVX512VLBW-LABEL: shuffle_v32i8_42_45_12_13_35_35_60_40_17_22_29_44_33_12_48_51_20_19_52_19_49_54_37_32_48_42_59_07_36_34_36_39: 3426; AVX512VLBW: # %bb.0: 3427; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,12,12,13,1,6,13,7,u,u,u,u,u,u,u,u,u,u,u,u,17,22,29,23,20,19,u,19,u,u,u,u] 3428; AVX512VLBW-NEXT: vpmovsxbq {{.*#+}} ymm2 = [0,5,6,1] 3429; AVX512VLBW-NEXT: vpermd %ymm0, %ymm2, %ymm2 3430; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm1[10,13],zero,zero,ymm1[3,3],zero,ymm1[8],zero,zero,zero,ymm1[12,1],zero,zero,zero,zero,zero,ymm1[20],zero,ymm1[17,22],zero,zero,ymm1[16],zero,ymm1[27],zero,zero,zero,zero,zero 3431; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,3,0,1] 3432; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm1 = zero,zero,zero,zero,zero,zero,ymm1[12],zero,zero,zero,zero,zero,zero,zero,ymm1[0,3],zero,zero,zero,zero,zero,zero,ymm1[21,16],zero,ymm1[26],zero,zero,ymm1[20,18,20,23] 3433; AVX512VLBW-NEXT: vpor %ymm0, %ymm1, %ymm0 3434; AVX512VLBW-NEXT: movl $134948620, %eax # imm = 0x80B270C 3435; AVX512VLBW-NEXT: kmovd %eax, %k1 3436; AVX512VLBW-NEXT: vmovdqu8 %ymm2, %ymm0 {%k1} 3437; AVX512VLBW-NEXT: retq 3438; 3439; AVX512VLVBMI-LABEL: shuffle_v32i8_42_45_12_13_35_35_60_40_17_22_29_44_33_12_48_51_20_19_52_19_49_54_37_32_48_42_59_07_36_34_36_39: 3440; AVX512VLVBMI: # %bb.0: 3441; AVX512VLVBMI-NEXT: vmovdqa {{.*#+}} ymm2 = [10,13,44,45,3,3,28,8,49,54,61,12,1,44,16,19,52,51,20,51,17,22,5,0,16,10,27,39,4,2,4,7] 3442; AVX512VLVBMI-NEXT: vpermi2b %ymm0, %ymm1, %ymm2 3443; AVX512VLVBMI-NEXT: vmovdqa %ymm2, %ymm0 3444; AVX512VLVBMI-NEXT: retq 3445; 3446; XOPAVX1-LABEL: shuffle_v32i8_42_45_12_13_35_35_60_40_17_22_29_44_33_12_48_51_20_19_52_19_49_54_37_32_48_42_59_07_36_34_36_39: 3447; XOPAVX1: # %bb.0: 3448; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 3449; XOPAVX1-NEXT: vpperm {{.*#+}} xmm3 = xmm1[u,u],xmm2[4],xmm1[u],xmm2[1,6],xmm1[5,0],xmm2[0],xmm1[10],xmm2[11],xmm1[u,4,2,4,7] 3450; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 3451; XOPAVX1-NEXT: vpperm {{.*#+}} xmm5 = xmm4[4,3,u,3,u,u,u,u,u,u,u],xmm0[7],xmm4[u,u,u,u] 3452; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm6 = [0,0,255,0,255,255,255,255,255,255,255,0,255,255,255,255] 3453; XOPAVX1-NEXT: vpblendvb %xmm6, %xmm3, %xmm5, %xmm3 3454; XOPAVX1-NEXT: vpperm {{.*#+}} xmm1 = xmm1[10,13,u,u,3,3],xmm2[12],xmm1[8,u,u,u,12,1,u],xmm2[0,3] 3455; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[u,u,12,13,u,u,u,u],xmm4[1,6,13],xmm0[u,u,12,u,u] 3456; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [255,255,0,0,255,255,255,255,0,0,0,255,255,0,255,255] 3457; XOPAVX1-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 3458; XOPAVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 3459; XOPAVX1-NEXT: retq 3460; 3461; XOPAVX2-LABEL: shuffle_v32i8_42_45_12_13_35_35_60_40_17_22_29_44_33_12_48_51_20_19_52_19_49_54_37_32_48_42_59_07_36_34_36_39: 3462; XOPAVX2: # %bb.0: 3463; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,12,12,13,1,6,13,7,u,u,u,u,u,u,u,u,u,u,u,u,17,22,29,23,20,19,u,19,u,u,u,u] 3464; XOPAVX2-NEXT: vpmovsxbq {{.*#+}} ymm2 = [0,5,6,1] 3465; XOPAVX2-NEXT: vpermd %ymm0, %ymm2, %ymm0 3466; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm2 = ymm1[10,13],zero,zero,ymm1[3,3],zero,ymm1[8],zero,zero,zero,ymm1[12,1],zero,zero,zero,zero,zero,ymm1[20],zero,ymm1[17,22],zero,zero,ymm1[16],zero,ymm1[27],zero,zero,zero,zero,zero 3467; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,3,0,1] 3468; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm1 = zero,zero,zero,zero,zero,zero,ymm1[12],zero,zero,zero,zero,zero,zero,zero,ymm1[0,3],zero,zero,zero,zero,zero,zero,ymm1[21,16],zero,ymm1[26],zero,zero,ymm1[20,18,20,23] 3469; XOPAVX2-NEXT: vpor %ymm1, %ymm2, %ymm1 3470; XOPAVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,0,0,255,255,255,255,0,0,0,255,255,0,255,255,0,0,255,0,255,255,255,255,255,255,255,0,255,255,255,255] 3471; XOPAVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 3472; XOPAVX2-NEXT: retq 3473 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 42, i32 45, i32 12, i32 13, i32 35, i32 35, i32 60, i32 40, i32 17, i32 22, i32 29, i32 44, i32 33, i32 12, i32 48, i32 51, i32 20, i32 19, i32 52, i32 19, i32 49, i32 54, i32 37, i32 32, i32 48, i32 42, i32 59, i32 7, i32 36, i32 34, i32 36, i32 39> 3474 ret <32 x i8> %shuffle 3475} 3476 3477define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_32_32_32_32_32_32_32_32_40_40_40_40_40_40_40_40(<32 x i8> %a, <32 x i8> %b) { 3478; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_32_32_32_32_32_32_32_32_40_40_40_40_40_40_40_40: 3479; AVX1: # %bb.0: 3480; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8] 3481; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 3482; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 3483; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 3484; AVX1-NEXT: retq 3485; 3486; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_32_32_32_32_32_32_32_32_40_40_40_40_40_40_40_40: 3487; AVX2: # %bb.0: 3488; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 3489; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8,16,16,16,16,16,16,16,16,24,24,24,24,24,24,24,24] 3490; AVX2-NEXT: retq 3491; 3492; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_32_32_32_32_32_32_32_32_40_40_40_40_40_40_40_40: 3493; AVX512VLBW: # %bb.0: 3494; AVX512VLBW-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 3495; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8,16,16,16,16,16,16,16,16,24,24,24,24,24,24,24,24] 3496; AVX512VLBW-NEXT: retq 3497; 3498; AVX512VLVBMI-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_32_32_32_32_32_32_32_32_40_40_40_40_40_40_40_40: 3499; AVX512VLVBMI: # %bb.0: 3500; AVX512VLVBMI-NEXT: vmovdqa {{.*#+}} ymm2 = [0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8,32,32,32,32,32,32,32,32,40,40,40,40,40,40,40,40] 3501; AVX512VLVBMI-NEXT: vpermt2b %ymm1, %ymm2, %ymm0 3502; AVX512VLVBMI-NEXT: retq 3503; 3504; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_32_32_32_32_32_32_32_32_40_40_40_40_40_40_40_40: 3505; XOPAVX1: # %bb.0: 3506; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8] 3507; XOPAVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 3508; XOPAVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 3509; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 3510; XOPAVX1-NEXT: retq 3511; 3512; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_32_32_32_32_32_32_32_32_40_40_40_40_40_40_40_40: 3513; XOPAVX2: # %bb.0: 3514; XOPAVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 3515; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8,16,16,16,16,16,16,16,16,24,24,24,24,24,24,24,24] 3516; XOPAVX2-NEXT: retq 3517 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40> 3518 ret <32 x i8> %shuffle 3519} 3520 3521define <32 x i8> @shuffle_v32i8_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24_32_32_32_32_32_32_32_32_40_40_40_40_40_40_40_40(<32 x i8> %a, <32 x i8> %b) { 3522; AVX1-LABEL: shuffle_v32i8_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24_32_32_32_32_32_32_32_32_40_40_40_40_40_40_40_40: 3523; AVX1: # %bb.0: 3524; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8] 3525; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 3526; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 3527; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 3528; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 3529; AVX1-NEXT: retq 3530; 3531; AVX2-LABEL: shuffle_v32i8_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24_32_32_32_32_32_32_32_32_40_40_40_40_40_40_40_40: 3532; AVX2: # %bb.0: 3533; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] 3534; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8,16,16,16,16,16,16,16,16,24,24,24,24,24,24,24,24] 3535; AVX2-NEXT: retq 3536; 3537; AVX512VLBW-LABEL: shuffle_v32i8_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24_32_32_32_32_32_32_32_32_40_40_40_40_40_40_40_40: 3538; AVX512VLBW: # %bb.0: 3539; AVX512VLBW-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] 3540; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8,16,16,16,16,16,16,16,16,24,24,24,24,24,24,24,24] 3541; AVX512VLBW-NEXT: retq 3542; 3543; AVX512VLVBMI-LABEL: shuffle_v32i8_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24_32_32_32_32_32_32_32_32_40_40_40_40_40_40_40_40: 3544; AVX512VLVBMI: # %bb.0: 3545; AVX512VLVBMI-NEXT: vmovdqa {{.*#+}} ymm2 = [16,16,16,16,16,16,16,16,24,24,24,24,24,24,24,24,32,32,32,32,32,32,32,32,40,40,40,40,40,40,40,40] 3546; AVX512VLVBMI-NEXT: vpermt2b %ymm1, %ymm2, %ymm0 3547; AVX512VLVBMI-NEXT: retq 3548; 3549; XOPAVX1-LABEL: shuffle_v32i8_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24_32_32_32_32_32_32_32_32_40_40_40_40_40_40_40_40: 3550; XOPAVX1: # %bb.0: 3551; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8] 3552; XOPAVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 3553; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 3554; XOPAVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 3555; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 3556; XOPAVX1-NEXT: retq 3557; 3558; XOPAVX2-LABEL: shuffle_v32i8_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24_32_32_32_32_32_32_32_32_40_40_40_40_40_40_40_40: 3559; XOPAVX2: # %bb.0: 3560; XOPAVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] 3561; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8,16,16,16,16,16,16,16,16,24,24,24,24,24,24,24,24] 3562; XOPAVX2-NEXT: retq 3563 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40> 3564 ret <32 x i8> %shuffle 3565} 3566 3567define <32 x i8> @shuffle_v32i8_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24_48_48_48_48_48_48_48_48_56_56_56_56_56_56_56_56(<32 x i8> %a, <32 x i8> %b) { 3568; AVX1-LABEL: shuffle_v32i8_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24_48_48_48_48_48_48_48_48_56_56_56_56_56_56_56_56: 3569; AVX1: # %bb.0: 3570; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 3571; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8] 3572; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 3573; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 3574; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 3575; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 3576; AVX1-NEXT: retq 3577; 3578; AVX2-LABEL: shuffle_v32i8_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24_48_48_48_48_48_48_48_48_56_56_56_56_56_56_56_56: 3579; AVX2: # %bb.0: 3580; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] 3581; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8,16,16,16,16,16,16,16,16,24,24,24,24,24,24,24,24] 3582; AVX2-NEXT: retq 3583; 3584; AVX512VLBW-LABEL: shuffle_v32i8_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24_48_48_48_48_48_48_48_48_56_56_56_56_56_56_56_56: 3585; AVX512VLBW: # %bb.0: 3586; AVX512VLBW-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] 3587; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8,16,16,16,16,16,16,16,16,24,24,24,24,24,24,24,24] 3588; AVX512VLBW-NEXT: retq 3589; 3590; AVX512VLVBMI-LABEL: shuffle_v32i8_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24_48_48_48_48_48_48_48_48_56_56_56_56_56_56_56_56: 3591; AVX512VLVBMI: # %bb.0: 3592; AVX512VLVBMI-NEXT: vmovdqa {{.*#+}} ymm2 = [16,16,16,16,16,16,16,16,24,24,24,24,24,24,24,24,48,48,48,48,48,48,48,48,56,56,56,56,56,56,56,56] 3593; AVX512VLVBMI-NEXT: vpermt2b %ymm1, %ymm2, %ymm0 3594; AVX512VLVBMI-NEXT: retq 3595; 3596; XOPAVX1-LABEL: shuffle_v32i8_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24_48_48_48_48_48_48_48_48_56_56_56_56_56_56_56_56: 3597; XOPAVX1: # %bb.0: 3598; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 3599; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8] 3600; XOPAVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 3601; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 3602; XOPAVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 3603; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 3604; XOPAVX1-NEXT: retq 3605; 3606; XOPAVX2-LABEL: shuffle_v32i8_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24_48_48_48_48_48_48_48_48_56_56_56_56_56_56_56_56: 3607; XOPAVX2: # %bb.0: 3608; XOPAVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] 3609; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8,16,16,16,16,16,16,16,16,24,24,24,24,24,24,24,24] 3610; XOPAVX2-NEXT: retq 3611 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56> 3612 ret <32 x i8> %shuffle 3613} 3614 3615define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_48_48_48_48_48_48_48_48_56_56_56_56_56_56_56_56(<32 x i8> %a, <32 x i8> %b) { 3616; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_48_48_48_48_48_48_48_48_56_56_56_56_56_56_56_56: 3617; AVX1: # %bb.0: 3618; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 3619; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8] 3620; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 3621; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 3622; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 3623; AVX1-NEXT: retq 3624; 3625; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_48_48_48_48_48_48_48_48_56_56_56_56_56_56_56_56: 3626; AVX2: # %bb.0: 3627; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] 3628; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8,16,16,16,16,16,16,16,16,24,24,24,24,24,24,24,24] 3629; AVX2-NEXT: retq 3630; 3631; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_48_48_48_48_48_48_48_48_56_56_56_56_56_56_56_56: 3632; AVX512VLBW: # %bb.0: 3633; AVX512VLBW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] 3634; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8,16,16,16,16,16,16,16,16,24,24,24,24,24,24,24,24] 3635; AVX512VLBW-NEXT: retq 3636; 3637; AVX512VLVBMI-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_48_48_48_48_48_48_48_48_56_56_56_56_56_56_56_56: 3638; AVX512VLVBMI: # %bb.0: 3639; AVX512VLVBMI-NEXT: vmovdqa {{.*#+}} ymm2 = [0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8,48,48,48,48,48,48,48,48,56,56,56,56,56,56,56,56] 3640; AVX512VLVBMI-NEXT: vpermt2b %ymm1, %ymm2, %ymm0 3641; AVX512VLVBMI-NEXT: retq 3642; 3643; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_48_48_48_48_48_48_48_48_56_56_56_56_56_56_56_56: 3644; XOPAVX1: # %bb.0: 3645; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 3646; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8] 3647; XOPAVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 3648; XOPAVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 3649; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 3650; XOPAVX1-NEXT: retq 3651; 3652; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_48_48_48_48_48_48_48_48_56_56_56_56_56_56_56_56: 3653; XOPAVX2: # %bb.0: 3654; XOPAVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] 3655; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8,16,16,16,16,16,16,16,16,24,24,24,24,24,24,24,24] 3656; XOPAVX2-NEXT: retq 3657 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56> 3658 ret <32 x i8> %shuffle 3659} 3660 3661define <32 x i8> @shuffle_v32i8_00_32_01_33_02_34_03_35_04_36_05_37_06_38_07_39_08_40_09_41_10_42_11_43_12_44_13_45_14_46_15_47(<32 x i8> %a, <32 x i8> %b) { 3662; AVX1-LABEL: shuffle_v32i8_00_32_01_33_02_34_03_35_04_36_05_37_06_38_07_39_08_40_09_41_10_42_11_43_12_44_13_45_14_46_15_47: 3663; AVX1: # %bb.0: 3664; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] 3665; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 3666; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 3667; AVX1-NEXT: retq 3668; 3669; AVX2-LABEL: shuffle_v32i8_00_32_01_33_02_34_03_35_04_36_05_37_06_38_07_39_08_40_09_41_10_42_11_43_12_44_13_45_14_46_15_47: 3670; AVX2: # %bb.0: 3671; AVX2-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] 3672; AVX2-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 3673; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 3674; AVX2-NEXT: retq 3675; 3676; AVX512VLBW-LABEL: shuffle_v32i8_00_32_01_33_02_34_03_35_04_36_05_37_06_38_07_39_08_40_09_41_10_42_11_43_12_44_13_45_14_46_15_47: 3677; AVX512VLBW: # %bb.0: 3678; AVX512VLBW-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] 3679; AVX512VLBW-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 3680; AVX512VLBW-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 3681; AVX512VLBW-NEXT: retq 3682; 3683; AVX512VLVBMI-LABEL: shuffle_v32i8_00_32_01_33_02_34_03_35_04_36_05_37_06_38_07_39_08_40_09_41_10_42_11_43_12_44_13_45_14_46_15_47: 3684; AVX512VLVBMI: # %bb.0: 3685; AVX512VLVBMI-NEXT: vmovdqa {{.*#+}} ymm2 = [0,32,1,33,2,34,3,35,4,36,5,37,6,38,7,39,8,40,9,41,10,42,11,43,12,44,13,45,14,46,15,47] 3686; AVX512VLVBMI-NEXT: vpermt2b %ymm1, %ymm2, %ymm0 3687; AVX512VLVBMI-NEXT: retq 3688; 3689; XOPAVX1-LABEL: shuffle_v32i8_00_32_01_33_02_34_03_35_04_36_05_37_06_38_07_39_08_40_09_41_10_42_11_43_12_44_13_45_14_46_15_47: 3690; XOPAVX1: # %bb.0: 3691; XOPAVX1-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] 3692; XOPAVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 3693; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 3694; XOPAVX1-NEXT: retq 3695; 3696; XOPAVX2-LABEL: shuffle_v32i8_00_32_01_33_02_34_03_35_04_36_05_37_06_38_07_39_08_40_09_41_10_42_11_43_12_44_13_45_14_46_15_47: 3697; XOPAVX2: # %bb.0: 3698; XOPAVX2-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] 3699; XOPAVX2-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 3700; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 3701; XOPAVX2-NEXT: retq 3702 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 8, i32 40, i32 9, i32 41, i32 10, i32 42, i32 11, i32 43, i32 12, i32 44, i32 13, i32 45, i32 14, i32 46, i32 15, i32 47> 3703 ret <32 x i8> %shuffle 3704} 3705 3706define <32 x i8> @shuffle_v32i8_15_14_13_12_11_10_09_08_07_06_05_04_03_02_01_00_32_34_36_38_40_42_44_46_33_35_37_39_41_43_45_47(<32 x i8> %a, <32 x i8> %b) { 3707; AVX1-LABEL: shuffle_v32i8_15_14_13_12_11_10_09_08_07_06_05_04_03_02_01_00_32_34_36_38_40_42_44_46_33_35_37_39_41_43_45_47: 3708; AVX1: # %bb.0: 3709; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0] 3710; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,2,4,6,8,10,12,14,1,3,5,7,9,11,13,15] 3711; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 3712; AVX1-NEXT: retq 3713; 3714; AVX2-LABEL: shuffle_v32i8_15_14_13_12_11_10_09_08_07_06_05_04_03_02_01_00_32_34_36_38_40_42_44_46_33_35_37_39_41_43_45_47: 3715; AVX2: # %bb.0: 3716; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 3717; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,16,18,20,22,24,26,28,30,17,19,21,23,25,27,29,31] 3718; AVX2-NEXT: retq 3719; 3720; AVX512VLBW-LABEL: shuffle_v32i8_15_14_13_12_11_10_09_08_07_06_05_04_03_02_01_00_32_34_36_38_40_42_44_46_33_35_37_39_41_43_45_47: 3721; AVX512VLBW: # %bb.0: 3722; AVX512VLBW-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 3723; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,16,18,20,22,24,26,28,30,17,19,21,23,25,27,29,31] 3724; AVX512VLBW-NEXT: retq 3725; 3726; AVX512VLVBMI-LABEL: shuffle_v32i8_15_14_13_12_11_10_09_08_07_06_05_04_03_02_01_00_32_34_36_38_40_42_44_46_33_35_37_39_41_43_45_47: 3727; AVX512VLVBMI: # %bb.0: 3728; AVX512VLVBMI-NEXT: vmovdqa {{.*#+}} ymm2 = [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,32,34,36,38,40,42,44,46,33,35,37,39,41,43,45,47] 3729; AVX512VLVBMI-NEXT: vpermt2b %ymm1, %ymm2, %ymm0 3730; AVX512VLVBMI-NEXT: retq 3731; 3732; XOPAVX1-LABEL: shuffle_v32i8_15_14_13_12_11_10_09_08_07_06_05_04_03_02_01_00_32_34_36_38_40_42_44_46_33_35_37_39_41_43_45_47: 3733; XOPAVX1: # %bb.0: 3734; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0] 3735; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,2,4,6,8,10,12,14,1,3,5,7,9,11,13,15] 3736; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 3737; XOPAVX1-NEXT: retq 3738; 3739; XOPAVX2-LABEL: shuffle_v32i8_15_14_13_12_11_10_09_08_07_06_05_04_03_02_01_00_32_34_36_38_40_42_44_46_33_35_37_39_41_43_45_47: 3740; XOPAVX2: # %bb.0: 3741; XOPAVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 3742; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,16,18,20,22,24,26,28,30,17,19,21,23,25,27,29,31] 3743; XOPAVX2-NEXT: retq 3744 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 32, i32 34, i32 36, i32 38, i32 40, i32 42, i32 44, i32 46, i32 33, i32 35, i32 37, i32 39, i32 41, i32 43, i32 45, i32 47> 3745 ret <32 x i8> %shuffle 3746} 3747 3748define <32 x i8> @shuffle_v32i8_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_32_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_48(<32 x i8> %a) { 3749; AVX1-LABEL: shuffle_v32i8_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_32_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_48: 3750; AVX1: # %bb.0: 3751; AVX1-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0] 3752; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 3753; AVX1-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0] 3754; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 3755; AVX1-NEXT: retq 3756; 3757; AVX2OR512VL-LABEL: shuffle_v32i8_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_32_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_48: 3758; AVX2OR512VL: # %bb.0: 3759; AVX2OR512VL-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16] 3760; AVX2OR512VL-NEXT: retq 3761; 3762; XOPAVX1-LABEL: shuffle_v32i8_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_32_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_48: 3763; XOPAVX1: # %bb.0: 3764; XOPAVX1-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0] 3765; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 3766; XOPAVX1-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0] 3767; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 3768; XOPAVX1-NEXT: retq 3769; 3770; XOPAVX2-LABEL: shuffle_v32i8_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_32_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_48: 3771; XOPAVX2: # %bb.0: 3772; XOPAVX2-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16] 3773; XOPAVX2-NEXT: retq 3774 %shuffle = shufflevector <32 x i8> zeroinitializer, <32 x i8> %a, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 32, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 48> 3775 ret <32 x i8> %shuffle 3776} 3777 3778define <32 x i8> @shuffle_v32i8_47_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_63_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz(<32 x i8> %a) { 3779; AVX1-LABEL: shuffle_v32i8_47_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_63_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz: 3780; AVX1: # %bb.0: 3781; AVX1-NEXT: vpsrldq {{.*#+}} xmm1 = xmm0[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 3782; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 3783; AVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 3784; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 3785; AVX1-NEXT: retq 3786; 3787; AVX2OR512VL-LABEL: shuffle_v32i8_47_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_63_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz: 3788; AVX2OR512VL: # %bb.0: 3789; AVX2OR512VL-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[31],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 3790; AVX2OR512VL-NEXT: retq 3791; 3792; XOPAVX1-LABEL: shuffle_v32i8_47_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_63_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz: 3793; XOPAVX1: # %bb.0: 3794; XOPAVX1-NEXT: vpsrldq {{.*#+}} xmm1 = xmm0[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 3795; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 3796; XOPAVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 3797; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 3798; XOPAVX1-NEXT: retq 3799; 3800; XOPAVX2-LABEL: shuffle_v32i8_47_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_63_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz: 3801; XOPAVX2: # %bb.0: 3802; XOPAVX2-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[31],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 3803; XOPAVX2-NEXT: retq 3804 %shuffle = shufflevector <32 x i8> zeroinitializer, <32 x i8> %a, <32 x i32> <i32 47, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 63, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 3805 ret <32 x i8> %shuffle 3806} 3807 3808; 3809; Shuffle to logical bit shifts 3810; 3811 3812define <32 x i8> @shuffle_v32i8_zz_00_zz_02_zz_04_zz_06_zz_08_zz_10_zz_12_zz_14_zz_16_zz_18_zz_20_zz_22_zz_24_zz_26_zz_28_zz_30(<32 x i8> %a) { 3813; AVX1-LABEL: shuffle_v32i8_zz_00_zz_02_zz_04_zz_06_zz_08_zz_10_zz_12_zz_14_zz_16_zz_18_zz_20_zz_22_zz_24_zz_26_zz_28_zz_30: 3814; AVX1: # %bb.0: 3815; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1 3816; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 3817; AVX1-NEXT: vpsllw $8, %xmm0, %xmm0 3818; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 3819; AVX1-NEXT: retq 3820; 3821; AVX2OR512VL-LABEL: shuffle_v32i8_zz_00_zz_02_zz_04_zz_06_zz_08_zz_10_zz_12_zz_14_zz_16_zz_18_zz_20_zz_22_zz_24_zz_26_zz_28_zz_30: 3822; AVX2OR512VL: # %bb.0: 3823; AVX2OR512VL-NEXT: vpsllw $8, %ymm0, %ymm0 3824; AVX2OR512VL-NEXT: retq 3825; 3826; XOPAVX1-LABEL: shuffle_v32i8_zz_00_zz_02_zz_04_zz_06_zz_08_zz_10_zz_12_zz_14_zz_16_zz_18_zz_20_zz_22_zz_24_zz_26_zz_28_zz_30: 3827; XOPAVX1: # %bb.0: 3828; XOPAVX1-NEXT: vpsllw $8, %xmm0, %xmm1 3829; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 3830; XOPAVX1-NEXT: vpsllw $8, %xmm0, %xmm0 3831; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 3832; XOPAVX1-NEXT: retq 3833; 3834; XOPAVX2-LABEL: shuffle_v32i8_zz_00_zz_02_zz_04_zz_06_zz_08_zz_10_zz_12_zz_14_zz_16_zz_18_zz_20_zz_22_zz_24_zz_26_zz_28_zz_30: 3835; XOPAVX2: # %bb.0: 3836; XOPAVX2-NEXT: vpsllw $8, %ymm0, %ymm0 3837; XOPAVX2-NEXT: retq 3838 %shuffle = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 32, i32 0, i32 32, i32 2, i32 32, i32 4, i32 32, i32 6, i32 32, i32 8, i32 32, i32 10, i32 32, i32 12, i32 32, i32 14, i32 32, i32 16, i32 32, i32 18, i32 32, i32 20, i32 32, i32 22, i32 32, i32 24, i32 32, i32 26, i32 32, i32 28, i32 32, i32 30> 3839 ret <32 x i8> %shuffle 3840} 3841 3842define <32 x i8> @shuffle_v32i8_zz_zz_00_01_zz_zz_04_05_zz_zz_08_09_zz_zz_12_13_zz_zz_16_17_zz_zz_20_21_zz_zz_24_25_zz_zz_28_29(<32 x i8> %a) { 3843; AVX1-LABEL: shuffle_v32i8_zz_zz_00_01_zz_zz_04_05_zz_zz_08_09_zz_zz_12_13_zz_zz_16_17_zz_zz_20_21_zz_zz_24_25_zz_zz_28_29: 3844; AVX1: # %bb.0: 3845; AVX1-NEXT: vpslld $16, %xmm0, %xmm1 3846; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 3847; AVX1-NEXT: vpslld $16, %xmm0, %xmm0 3848; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 3849; AVX1-NEXT: retq 3850; 3851; AVX2OR512VL-LABEL: shuffle_v32i8_zz_zz_00_01_zz_zz_04_05_zz_zz_08_09_zz_zz_12_13_zz_zz_16_17_zz_zz_20_21_zz_zz_24_25_zz_zz_28_29: 3852; AVX2OR512VL: # %bb.0: 3853; AVX2OR512VL-NEXT: vpslld $16, %ymm0, %ymm0 3854; AVX2OR512VL-NEXT: retq 3855; 3856; XOPAVX1-LABEL: shuffle_v32i8_zz_zz_00_01_zz_zz_04_05_zz_zz_08_09_zz_zz_12_13_zz_zz_16_17_zz_zz_20_21_zz_zz_24_25_zz_zz_28_29: 3857; XOPAVX1: # %bb.0: 3858; XOPAVX1-NEXT: vpslld $16, %xmm0, %xmm1 3859; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 3860; XOPAVX1-NEXT: vpslld $16, %xmm0, %xmm0 3861; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 3862; XOPAVX1-NEXT: retq 3863; 3864; XOPAVX2-LABEL: shuffle_v32i8_zz_zz_00_01_zz_zz_04_05_zz_zz_08_09_zz_zz_12_13_zz_zz_16_17_zz_zz_20_21_zz_zz_24_25_zz_zz_28_29: 3865; XOPAVX2: # %bb.0: 3866; XOPAVX2-NEXT: vpslld $16, %ymm0, %ymm0 3867; XOPAVX2-NEXT: retq 3868 %shuffle = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 32, i32 32, i32 0, i32 1, i32 32, i32 32, i32 4, i32 5, i32 32, i32 32, i32 8, i32 9, i32 32, i32 32, i32 12, i32 13, i32 32, i32 32, i32 16, i32 17, i32 32, i32 32, i32 20, i32 21, i32 32, i32 32, i32 24, i32 25, i32 32, i32 32, i32 28, i32 29> 3869 ret <32 x i8> %shuffle 3870} 3871 3872define <32 x i8> @shuffle_v32i8_zz_zz_zz_zz_zz_zz_00_01_zz_zz_zz_zz_zz_zz_08_09_zz_zz_zz_zz_zz_zz_16_17_zz_zz_zz_zz_zz_zz_24_25(<32 x i8> %a) { 3873; AVX1-LABEL: shuffle_v32i8_zz_zz_zz_zz_zz_zz_00_01_zz_zz_zz_zz_zz_zz_08_09_zz_zz_zz_zz_zz_zz_16_17_zz_zz_zz_zz_zz_zz_24_25: 3874; AVX1: # %bb.0: 3875; AVX1-NEXT: vpsllq $48, %xmm0, %xmm1 3876; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 3877; AVX1-NEXT: vpsllq $48, %xmm0, %xmm0 3878; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 3879; AVX1-NEXT: retq 3880; 3881; AVX2OR512VL-LABEL: shuffle_v32i8_zz_zz_zz_zz_zz_zz_00_01_zz_zz_zz_zz_zz_zz_08_09_zz_zz_zz_zz_zz_zz_16_17_zz_zz_zz_zz_zz_zz_24_25: 3882; AVX2OR512VL: # %bb.0: 3883; AVX2OR512VL-NEXT: vpsllq $48, %ymm0, %ymm0 3884; AVX2OR512VL-NEXT: retq 3885; 3886; XOPAVX1-LABEL: shuffle_v32i8_zz_zz_zz_zz_zz_zz_00_01_zz_zz_zz_zz_zz_zz_08_09_zz_zz_zz_zz_zz_zz_16_17_zz_zz_zz_zz_zz_zz_24_25: 3887; XOPAVX1: # %bb.0: 3888; XOPAVX1-NEXT: vpsllq $48, %xmm0, %xmm1 3889; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 3890; XOPAVX1-NEXT: vpsllq $48, %xmm0, %xmm0 3891; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 3892; XOPAVX1-NEXT: retq 3893; 3894; XOPAVX2-LABEL: shuffle_v32i8_zz_zz_zz_zz_zz_zz_00_01_zz_zz_zz_zz_zz_zz_08_09_zz_zz_zz_zz_zz_zz_16_17_zz_zz_zz_zz_zz_zz_24_25: 3895; XOPAVX2: # %bb.0: 3896; XOPAVX2-NEXT: vpsllq $48, %ymm0, %ymm0 3897; XOPAVX2-NEXT: retq 3898 %shuffle = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 0, i32 1, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 8, i32 9, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 16, i32 17, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 24, i32 25> 3899 ret <32 x i8> %shuffle 3900} 3901 3902define <32 x i8> @shuffle_v32i8_01_zz_03_zz_05_zz_07_zz_09_zz_11_zz_13_zz_15_zz_17_zz_19_zz_21_zz_23_zz_25_zz_27_zz_29_zz_31_zz(<32 x i8> %a) { 3903; AVX1-LABEL: shuffle_v32i8_01_zz_03_zz_05_zz_07_zz_09_zz_11_zz_13_zz_15_zz_17_zz_19_zz_21_zz_23_zz_25_zz_27_zz_29_zz_31_zz: 3904; AVX1: # %bb.0: 3905; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 3906; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 3907; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 3908; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 3909; AVX1-NEXT: retq 3910; 3911; AVX2OR512VL-LABEL: shuffle_v32i8_01_zz_03_zz_05_zz_07_zz_09_zz_11_zz_13_zz_15_zz_17_zz_19_zz_21_zz_23_zz_25_zz_27_zz_29_zz_31_zz: 3912; AVX2OR512VL: # %bb.0: 3913; AVX2OR512VL-NEXT: vpsrlw $8, %ymm0, %ymm0 3914; AVX2OR512VL-NEXT: retq 3915; 3916; XOPAVX1-LABEL: shuffle_v32i8_01_zz_03_zz_05_zz_07_zz_09_zz_11_zz_13_zz_15_zz_17_zz_19_zz_21_zz_23_zz_25_zz_27_zz_29_zz_31_zz: 3917; XOPAVX1: # %bb.0: 3918; XOPAVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 3919; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 3920; XOPAVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 3921; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 3922; XOPAVX1-NEXT: retq 3923; 3924; XOPAVX2-LABEL: shuffle_v32i8_01_zz_03_zz_05_zz_07_zz_09_zz_11_zz_13_zz_15_zz_17_zz_19_zz_21_zz_23_zz_25_zz_27_zz_29_zz_31_zz: 3925; XOPAVX2: # %bb.0: 3926; XOPAVX2-NEXT: vpsrlw $8, %ymm0, %ymm0 3927; XOPAVX2-NEXT: retq 3928 %shuffle = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 1, i32 32, i32 3, i32 32, i32 5, i32 32, i32 7, i32 32, i32 9, i32 32, i32 11, i32 32, i32 13, i32 32, i32 15, i32 32, i32 17, i32 32, i32 19, i32 32, i32 21, i32 32, i32 23, i32 32, i32 25, i32 32, i32 27, i32 32, i32 29, i32 32, i32 31, i32 32> 3929 ret <32 x i8> %shuffle 3930} 3931 3932define <32 x i8> @shuffle_v32i8_02_03_zz_zz_06_07_zz_zz_10_11_zz_zz_14_15_zz_zz_18_19_zz_zz_22_23_zz_zz_26_27_zz_zz_30_31_zz_zz(<32 x i8> %a) { 3933; AVX1-LABEL: shuffle_v32i8_02_03_zz_zz_06_07_zz_zz_10_11_zz_zz_14_15_zz_zz_18_19_zz_zz_22_23_zz_zz_26_27_zz_zz_30_31_zz_zz: 3934; AVX1: # %bb.0: 3935; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1 3936; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 3937; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0 3938; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 3939; AVX1-NEXT: retq 3940; 3941; AVX2OR512VL-LABEL: shuffle_v32i8_02_03_zz_zz_06_07_zz_zz_10_11_zz_zz_14_15_zz_zz_18_19_zz_zz_22_23_zz_zz_26_27_zz_zz_30_31_zz_zz: 3942; AVX2OR512VL: # %bb.0: 3943; AVX2OR512VL-NEXT: vpsrld $16, %ymm0, %ymm0 3944; AVX2OR512VL-NEXT: retq 3945; 3946; XOPAVX1-LABEL: shuffle_v32i8_02_03_zz_zz_06_07_zz_zz_10_11_zz_zz_14_15_zz_zz_18_19_zz_zz_22_23_zz_zz_26_27_zz_zz_30_31_zz_zz: 3947; XOPAVX1: # %bb.0: 3948; XOPAVX1-NEXT: vpsrld $16, %xmm0, %xmm1 3949; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 3950; XOPAVX1-NEXT: vpsrld $16, %xmm0, %xmm0 3951; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 3952; XOPAVX1-NEXT: retq 3953; 3954; XOPAVX2-LABEL: shuffle_v32i8_02_03_zz_zz_06_07_zz_zz_10_11_zz_zz_14_15_zz_zz_18_19_zz_zz_22_23_zz_zz_26_27_zz_zz_30_31_zz_zz: 3955; XOPAVX2: # %bb.0: 3956; XOPAVX2-NEXT: vpsrld $16, %ymm0, %ymm0 3957; XOPAVX2-NEXT: retq 3958 %shuffle = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 2, i32 3, i32 32, i32 32, i32 6, i32 7, i32 32, i32 32, i32 10, i32 11, i32 32, i32 32, i32 14, i32 15, i32 32, i32 32, i32 18, i32 19, i32 32, i32 32, i32 22, i32 23, i32 32, i32 32, i32 26, i32 27, i32 32, i32 32, i32 30, i32 31, i32 32, i32 32> 3959 ret <32 x i8> %shuffle 3960} 3961 3962define <32 x i8> @shuffle_v32i8_07_zz_zz_zz_zz_zz_zz_zz_15_zz_zz_zz_zz_z_zz_zz_23_zz_zz_zz_zz_zz_zz_zz_31_zz_zz_zz_zz_zz_zz_zz(<32 x i8> %a) { 3963; AVX1-LABEL: shuffle_v32i8_07_zz_zz_zz_zz_zz_zz_zz_15_zz_zz_zz_zz_z_zz_zz_23_zz_zz_zz_zz_zz_zz_zz_31_zz_zz_zz_zz_zz_zz_zz: 3964; AVX1: # %bb.0: 3965; AVX1-NEXT: vpsrlq $56, %xmm0, %xmm1 3966; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 3967; AVX1-NEXT: vpsrlq $56, %xmm0, %xmm0 3968; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 3969; AVX1-NEXT: retq 3970; 3971; AVX2OR512VL-LABEL: shuffle_v32i8_07_zz_zz_zz_zz_zz_zz_zz_15_zz_zz_zz_zz_z_zz_zz_23_zz_zz_zz_zz_zz_zz_zz_31_zz_zz_zz_zz_zz_zz_zz: 3972; AVX2OR512VL: # %bb.0: 3973; AVX2OR512VL-NEXT: vpsrlq $56, %ymm0, %ymm0 3974; AVX2OR512VL-NEXT: retq 3975; 3976; XOPAVX1-LABEL: shuffle_v32i8_07_zz_zz_zz_zz_zz_zz_zz_15_zz_zz_zz_zz_z_zz_zz_23_zz_zz_zz_zz_zz_zz_zz_31_zz_zz_zz_zz_zz_zz_zz: 3977; XOPAVX1: # %bb.0: 3978; XOPAVX1-NEXT: vpsrlq $56, %xmm0, %xmm1 3979; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 3980; XOPAVX1-NEXT: vpsrlq $56, %xmm0, %xmm0 3981; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 3982; XOPAVX1-NEXT: retq 3983; 3984; XOPAVX2-LABEL: shuffle_v32i8_07_zz_zz_zz_zz_zz_zz_zz_15_zz_zz_zz_zz_z_zz_zz_23_zz_zz_zz_zz_zz_zz_zz_31_zz_zz_zz_zz_zz_zz_zz: 3985; XOPAVX2: # %bb.0: 3986; XOPAVX2-NEXT: vpsrlq $56, %ymm0, %ymm0 3987; XOPAVX2-NEXT: retq 3988 %shuffle = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 7, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 15, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 23, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32> 3989 ret <32 x i8> %shuffle 3990} 3991 3992define <32 x i8> @shuffle_v32i8_32_zz_zz_zz_zz_zz_zz_zz_33_zz_zz_zz_zz_zz_zz_zz_34_zz_zz_zz_zz_zz_zz_zz_35_zz_zz_zz_zz_zz_zz_zz(<32 x i8> %a) { 3993; AVX1-LABEL: shuffle_v32i8_32_zz_zz_zz_zz_zz_zz_zz_33_zz_zz_zz_zz_zz_zz_zz_34_zz_zz_zz_zz_zz_zz_zz_35_zz_zz_zz_zz_zz_zz_zz: 3994; AVX1: # %bb.0: 3995; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 3996; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0 3997; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 3998; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 3999; AVX1-NEXT: retq 4000; 4001; AVX2OR512VL-LABEL: shuffle_v32i8_32_zz_zz_zz_zz_zz_zz_zz_33_zz_zz_zz_zz_zz_zz_zz_34_zz_zz_zz_zz_zz_zz_zz_35_zz_zz_zz_zz_zz_zz_zz: 4002; AVX2OR512VL: # %bb.0: 4003; AVX2OR512VL-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero 4004; AVX2OR512VL-NEXT: retq 4005; 4006; XOPAVX1-LABEL: shuffle_v32i8_32_zz_zz_zz_zz_zz_zz_zz_33_zz_zz_zz_zz_zz_zz_zz_34_zz_zz_zz_zz_zz_zz_zz_35_zz_zz_zz_zz_zz_zz_zz: 4007; XOPAVX1: # %bb.0: 4008; XOPAVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 4009; XOPAVX1-NEXT: vpsrld $16, %xmm0, %xmm0 4010; XOPAVX1-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 4011; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 4012; XOPAVX1-NEXT: retq 4013; 4014; XOPAVX2-LABEL: shuffle_v32i8_32_zz_zz_zz_zz_zz_zz_zz_33_zz_zz_zz_zz_zz_zz_zz_34_zz_zz_zz_zz_zz_zz_zz_35_zz_zz_zz_zz_zz_zz_zz: 4015; XOPAVX2: # %bb.0: 4016; XOPAVX2-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero 4017; XOPAVX2-NEXT: retq 4018 %shuffle = shufflevector <32 x i8> zeroinitializer, <32 x i8> %a, <32 x i32> <i32 32, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 33, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 34, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 35, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 4019 ret <32 x i8> %shuffle 4020} 4021 4022define <32 x i8> @shuffle_v32i8_32_zz_zz_zz_33_zz_zz_zz_34_zz_zz_zz_35_zz_zz_zz_36_zz_zz_zz_37_zz_zz_zz_38_zz_zz_zz_39_zz_zz_zz(<32 x i8> %a) { 4023; AVX1-LABEL: shuffle_v32i8_32_zz_zz_zz_33_zz_zz_zz_34_zz_zz_zz_35_zz_zz_zz_36_zz_zz_zz_37_zz_zz_zz_38_zz_zz_zz_39_zz_zz_zz: 4024; AVX1: # %bb.0: 4025; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 4026; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] 4027; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 4028; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 4029; AVX1-NEXT: retq 4030; 4031; AVX2OR512VL-LABEL: shuffle_v32i8_32_zz_zz_zz_33_zz_zz_zz_34_zz_zz_zz_35_zz_zz_zz_36_zz_zz_zz_37_zz_zz_zz_38_zz_zz_zz_39_zz_zz_zz: 4032; AVX2OR512VL: # %bb.0: 4033; AVX2OR512VL-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 4034; AVX2OR512VL-NEXT: retq 4035; 4036; XOPAVX1-LABEL: shuffle_v32i8_32_zz_zz_zz_33_zz_zz_zz_34_zz_zz_zz_35_zz_zz_zz_36_zz_zz_zz_37_zz_zz_zz_38_zz_zz_zz_39_zz_zz_zz: 4037; XOPAVX1: # %bb.0: 4038; XOPAVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 4039; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] 4040; XOPAVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 4041; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 4042; XOPAVX1-NEXT: retq 4043; 4044; XOPAVX2-LABEL: shuffle_v32i8_32_zz_zz_zz_33_zz_zz_zz_34_zz_zz_zz_35_zz_zz_zz_36_zz_zz_zz_37_zz_zz_zz_38_zz_zz_zz_39_zz_zz_zz: 4045; XOPAVX2: # %bb.0: 4046; XOPAVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 4047; XOPAVX2-NEXT: retq 4048 %shuffle = shufflevector <32 x i8> zeroinitializer, <32 x i8> %a, <32 x i32> <i32 32, i32 0, i32 0, i32 0, i32 33, i32 0, i32 0, i32 0, i32 34, i32 0, i32 0, i32 0, i32 35, i32 0, i32 0, i32 0, i32 36, i32 0, i32 0, i32 0, i32 37, i32 0, i32 0, i32 0, i32 38, i32 0, i32 0, i32 0, i32 39, i32 0, i32 0, i32 0> 4049 ret <32 x i8> %shuffle 4050} 4051 4052define <32 x i8> @shuffle_v32i8_32_zz_33_zz_34_zz_35_zz_36_zz_37_zz_38_zz_39_zz_40_zz_41_zz_42_zz_43_zz_44_zz_45_zz_46_zz_47_zz(<32 x i8> %a) { 4053; AVX1-LABEL: shuffle_v32i8_32_zz_33_zz_34_zz_35_zz_36_zz_37_zz_38_zz_39_zz_40_zz_41_zz_42_zz_43_zz_44_zz_45_zz_46_zz_47_zz: 4054; AVX1: # %bb.0: 4055; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 4056; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] 4057; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 4058; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 4059; AVX1-NEXT: retq 4060; 4061; AVX2OR512VL-LABEL: shuffle_v32i8_32_zz_33_zz_34_zz_35_zz_36_zz_37_zz_38_zz_39_zz_40_zz_41_zz_42_zz_43_zz_44_zz_45_zz_46_zz_47_zz: 4062; AVX2OR512VL: # %bb.0: 4063; AVX2OR512VL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 4064; AVX2OR512VL-NEXT: retq 4065; 4066; XOPAVX1-LABEL: shuffle_v32i8_32_zz_33_zz_34_zz_35_zz_36_zz_37_zz_38_zz_39_zz_40_zz_41_zz_42_zz_43_zz_44_zz_45_zz_46_zz_47_zz: 4067; XOPAVX1: # %bb.0: 4068; XOPAVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 4069; XOPAVX1-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] 4070; XOPAVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 4071; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 4072; XOPAVX1-NEXT: retq 4073; 4074; XOPAVX2-LABEL: shuffle_v32i8_32_zz_33_zz_34_zz_35_zz_36_zz_37_zz_38_zz_39_zz_40_zz_41_zz_42_zz_43_zz_44_zz_45_zz_46_zz_47_zz: 4075; XOPAVX2: # %bb.0: 4076; XOPAVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 4077; XOPAVX2-NEXT: retq 4078 %shuffle = shufflevector <32 x i8> zeroinitializer, <32 x i8> %a, <32 x i32> <i32 32, i32 0, i32 33, i32 0, i32 34, i32 0, i32 35, i32 0, i32 36, i32 0, i32 37, i32 0, i32 38, i32 0, i32 39, i32 0, i32 40, i32 0, i32 41, i32 0, i32 42, i32 0, i32 43, i32 0, i32 44, i32 0, i32 45, i32 0, i32 46, i32 0, i32 47, i32 0> 4079 ret <32 x i8> %shuffle 4080} 4081 4082define <32 x i8> @shuffle_v32i8_56_zz_zz_zz_57_zz_zz_zz_58_zz_zz_zz__zz_59_zz_zz_zz_60_zz_zz_zz_61_zz_zz_zz_62_zz_zz_zz_63_zz_zz_zz(<32 x i8> %a) { 4083; AVX1-LABEL: shuffle_v32i8_56_zz_zz_zz_57_zz_zz_zz_58_zz_zz_zz__zz_59_zz_zz_zz_60_zz_zz_zz_61_zz_zz_zz_62_zz_zz_zz_63_zz_zz_zz: 4084; AVX1: # %bb.0: 4085; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 4086; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 4087; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero 4088; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] 4089; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 4090; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 4091; AVX1-NEXT: retq 4092; 4093; AVX2-LABEL: shuffle_v32i8_56_zz_zz_zz_57_zz_zz_zz_58_zz_zz_zz__zz_59_zz_zz_zz_60_zz_zz_zz_61_zz_zz_zz_62_zz_zz_zz_63_zz_zz_zz: 4094; AVX2: # %bb.0: 4095; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,3,3,3] 4096; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8],zero,zero,zero,ymm0[9],zero,zero,zero,ymm0[10],zero,zero,zero,ymm0[11],zero,zero,zero,ymm0[28],zero,zero,zero,ymm0[29],zero,zero,zero,ymm0[30],zero,zero,zero,ymm0[31],zero,zero,zero 4097; AVX2-NEXT: retq 4098; 4099; AVX512VLBW-LABEL: shuffle_v32i8_56_zz_zz_zz_57_zz_zz_zz_58_zz_zz_zz__zz_59_zz_zz_zz_60_zz_zz_zz_61_zz_zz_zz_62_zz_zz_zz_63_zz_zz_zz: 4100; AVX512VLBW: # %bb.0: 4101; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,3,3,3] 4102; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8],zero,zero,zero,ymm0[9],zero,zero,zero,ymm0[10],zero,zero,zero,ymm0[11],zero,zero,zero,ymm0[28],zero,zero,zero,ymm0[29],zero,zero,zero,ymm0[30],zero,zero,zero,ymm0[31],zero,zero,zero 4103; AVX512VLBW-NEXT: retq 4104; 4105; AVX512VLVBMI-LABEL: shuffle_v32i8_56_zz_zz_zz_57_zz_zz_zz_58_zz_zz_zz__zz_59_zz_zz_zz_60_zz_zz_zz_61_zz_zz_zz_62_zz_zz_zz_63_zz_zz_zz: 4106; AVX512VLVBMI: # %bb.0: 4107; AVX512VLVBMI-NEXT: vmovdqa {{.*#+}} ymm2 = [56,1,2,3,57,5,6,7,58,9,10,11,59,13,14,15,60,17,18,19,61,21,22,23,62,25,26,27,63,29,30,31] 4108; AVX512VLVBMI-NEXT: vpxor %xmm1, %xmm1, %xmm1 4109; AVX512VLVBMI-NEXT: vpermt2b %ymm0, %ymm2, %ymm1 4110; AVX512VLVBMI-NEXT: vmovdqa %ymm1, %ymm0 4111; AVX512VLVBMI-NEXT: retq 4112; 4113; XOPAVX1-LABEL: shuffle_v32i8_56_zz_zz_zz_57_zz_zz_zz_58_zz_zz_zz__zz_59_zz_zz_zz_60_zz_zz_zz_61_zz_zz_zz_62_zz_zz_zz_63_zz_zz_zz: 4114; XOPAVX1: # %bb.0: 4115; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 4116; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 4117; XOPAVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero 4118; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] 4119; XOPAVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 4120; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 4121; XOPAVX1-NEXT: retq 4122; 4123; XOPAVX2-LABEL: shuffle_v32i8_56_zz_zz_zz_57_zz_zz_zz_58_zz_zz_zz__zz_59_zz_zz_zz_60_zz_zz_zz_61_zz_zz_zz_62_zz_zz_zz_63_zz_zz_zz: 4124; XOPAVX2: # %bb.0: 4125; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,3,3,3] 4126; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8],zero,zero,zero,ymm0[9],zero,zero,zero,ymm0[10],zero,zero,zero,ymm0[11],zero,zero,zero,ymm0[28],zero,zero,zero,ymm0[29],zero,zero,zero,ymm0[30],zero,zero,zero,ymm0[31],zero,zero,zero 4127; XOPAVX2-NEXT: retq 4128 %shuffle = shufflevector <32 x i8> zeroinitializer, <32 x i8> %a, <32 x i32> <i32 56, i32 1, i32 2, i32 3, i32 57, i32 5, i32 6, i32 7, i32 58, i32 9, i32 10, i32 11, i32 59, i32 13, i32 14, i32 15, i32 60, i32 17, i32 18, i32 19, i32 61, i32 21, i32 22, i32 23, i32 62, i32 25, i32 26, i32 27, i32 63, i32 29, i32 30, i32 31> 4129 ret <32 x i8> %shuffle 4130} 4131 4132; PR121823 4133define <32 x i8> @shuffle_v32i8_01_09_00_03_11_02_05_13_04_07_15_06_17_25_16_19_27_18_21_29_20_23_31_22_zz_zz_zz_zz_zz_zz_zz_zz(<32 x i8> %a) { 4134; AVX1-LABEL: shuffle_v32i8_01_09_00_03_11_02_05_13_04_07_15_06_17_25_16_19_27_18_21_29_20_23_31_22_zz_zz_zz_zz_zz_zz_zz_zz: 4135; AVX1: # %bb.0: 4136; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 4137; AVX1-NEXT: vpshufb {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[1,9,0,3] 4138; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,9,0,3,11,2,5,13,4,7,15,6],zero,zero,zero,zero 4139; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0 4140; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[11,2,5,13,4,7,15,6],zero,zero,zero,zero,zero,zero,zero,zero 4141; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 4142; AVX1-NEXT: retq 4143; 4144; AVX2-LABEL: shuffle_v32i8_01_09_00_03_11_02_05_13_04_07_15_06_17_25_16_19_27_18_21_29_20_23_31_22_zz_zz_zz_zz_zz_zz_zz_zz: 4145; AVX2: # %bb.0: 4146; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[1,9,0,3,11,2,5,13,4,7,15,6,u,u,u,u,17,25,16,19,27,18,21,29,20,23,31,22,u,u,u,u] 4147; AVX2-NEXT: vpmovsxbd {{.*#+}} ymm1 = [0,1,2,4,5,6,0,0] 4148; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 4149; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 4150; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5],ymm1[6,7] 4151; AVX2-NEXT: retq 4152; 4153; AVX512VLBW-LABEL: shuffle_v32i8_01_09_00_03_11_02_05_13_04_07_15_06_17_25_16_19_27_18_21_29_20_23_31_22_zz_zz_zz_zz_zz_zz_zz_zz: 4154; AVX512VLBW: # %bb.0: 4155; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[1,9,0,3,11,2,5,13,4,7,15,6,u,u,u,u,17,25,16,19,27,18,21,29,20,23,31,22,u,u,u,u] 4156; AVX512VLBW-NEXT: vpxor %xmm2, %xmm2, %xmm2 4157; AVX512VLBW-NEXT: vpmovsxbd {{.*#+}} ymm0 = [0,1,2,4,5,6,14,15] 4158; AVX512VLBW-NEXT: vpermi2d %ymm2, %ymm1, %ymm0 4159; AVX512VLBW-NEXT: retq 4160; 4161; AVX512VLVBMI-LABEL: shuffle_v32i8_01_09_00_03_11_02_05_13_04_07_15_06_17_25_16_19_27_18_21_29_20_23_31_22_zz_zz_zz_zz_zz_zz_zz_zz: 4162; AVX512VLVBMI: # %bb.0: 4163; AVX512VLVBMI-NEXT: vpxor %xmm1, %xmm1, %xmm1 4164; AVX512VLVBMI-NEXT: vmovdqa {{.*#+}} ymm2 = [1,9,0,3,11,2,5,13,4,7,15,6,17,25,16,19,27,18,21,29,20,23,31,22,56,57,58,59,60,61,62,63] 4165; AVX512VLVBMI-NEXT: vpermt2b %ymm1, %ymm2, %ymm0 4166; AVX512VLVBMI-NEXT: retq 4167; 4168; XOPAVX1-LABEL: shuffle_v32i8_01_09_00_03_11_02_05_13_04_07_15_06_17_25_16_19_27_18_21_29_20_23_31_22_zz_zz_zz_zz_zz_zz_zz_zz: 4169; XOPAVX1: # %bb.0: 4170; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 4171; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[1,9,0,3,11,2,5,13,4,7,15,6],xmm1[1,9,0,3] 4172; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[11,2,5,13,4,7,15,6],zero,zero,zero,zero,zero,zero,zero,zero 4173; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 4174; XOPAVX1-NEXT: retq 4175; 4176; XOPAVX2-LABEL: shuffle_v32i8_01_09_00_03_11_02_05_13_04_07_15_06_17_25_16_19_27_18_21_29_20_23_31_22_zz_zz_zz_zz_zz_zz_zz_zz: 4177; XOPAVX2: # %bb.0: 4178; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[1,9,0,3,11,2,5,13,4,7,15,6,u,u,u,u,17,25,16,19,27,18,21,29,20,23,31,22,u,u,u,u] 4179; XOPAVX2-NEXT: vpmovsxbd {{.*#+}} ymm1 = [0,1,2,4,5,6,0,0] 4180; XOPAVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 4181; XOPAVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 4182; XOPAVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5],ymm1[6,7] 4183; XOPAVX2-NEXT: retq 4184 %r = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 1, i32 9, i32 0, i32 3, i32 11, i32 2, i32 5, i32 13, i32 4, i32 7, i32 15, i32 6, i32 17, i32 25, i32 16, i32 19, i32 27, i32 18, i32 21, i32 29, i32 20, i32 23, i32 31, i32 22, i32 32, i32 32, i32 32, i32 32, i32 48, i32 48, i32 48, i32 48> 4185 ret <32 x i8> %r 4186} 4187 4188define <32 x i8> @shuffle_v32i8_47_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_63_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30(<32 x i8> %a, <32 x i8> %b) { 4189; AVX1-LABEL: shuffle_v32i8_47_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_63_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30: 4190; AVX1: # %bb.0: 4191; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 4192; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 4193; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[15],xmm3[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] 4194; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] 4195; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 4196; AVX1-NEXT: retq 4197; 4198; AVX2OR512VL-LABEL: shuffle_v32i8_47_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_63_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30: 4199; AVX2OR512VL: # %bb.0: 4200; AVX2OR512VL-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[15],ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],ymm1[31],ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30] 4201; AVX2OR512VL-NEXT: retq 4202; 4203; XOPAVX1-LABEL: shuffle_v32i8_47_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_63_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30: 4204; XOPAVX1: # %bb.0: 4205; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 4206; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 4207; XOPAVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[15],xmm3[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] 4208; XOPAVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] 4209; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 4210; XOPAVX1-NEXT: retq 4211; 4212; XOPAVX2-LABEL: shuffle_v32i8_47_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_63_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30: 4213; XOPAVX2: # %bb.0: 4214; XOPAVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[15],ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],ymm1[31],ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30] 4215; XOPAVX2-NEXT: retq 4216 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 47, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 63, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30> 4217 ret <32 x i8> %shuffle 4218} 4219 4220define <32 x i8> @shuffle_v32i8_uu_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_63_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30(<32 x i8> %a, <32 x i8> %b) { 4221; AVX1-LABEL: shuffle_v32i8_uu_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_63_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30: 4222; AVX1: # %bb.0: 4223; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 4224; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 4225; AVX1-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[15],xmm2[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] 4226; AVX1-NEXT: vpslldq {{.*#+}} xmm0 = zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] 4227; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 4228; AVX1-NEXT: retq 4229; 4230; AVX2OR512VL-LABEL: shuffle_v32i8_uu_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_63_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30: 4231; AVX2OR512VL: # %bb.0: 4232; AVX2OR512VL-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[15],ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],ymm1[31],ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30] 4233; AVX2OR512VL-NEXT: retq 4234; 4235; XOPAVX1-LABEL: shuffle_v32i8_uu_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_63_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30: 4236; XOPAVX1: # %bb.0: 4237; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 4238; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 4239; XOPAVX1-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[15],xmm2[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] 4240; XOPAVX1-NEXT: vpslldq {{.*#+}} xmm0 = zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] 4241; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 4242; XOPAVX1-NEXT: retq 4243; 4244; XOPAVX2-LABEL: shuffle_v32i8_uu_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_63_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30: 4245; XOPAVX2: # %bb.0: 4246; XOPAVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[15],ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],ymm1[31],ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30] 4247; XOPAVX2-NEXT: retq 4248 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 poison, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 63, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30> 4249 ret <32 x i8> %shuffle 4250} 4251 4252define <32 x i8> @shuffle_v32i8_47_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_uu_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30(<32 x i8> %a, <32 x i8> %b) { 4253; AVX1-LABEL: shuffle_v32i8_47_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_uu_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30: 4254; AVX1: # %bb.0: 4255; AVX1-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] 4256; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 4257; AVX1-NEXT: vpslldq {{.*#+}} xmm0 = zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] 4258; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 4259; AVX1-NEXT: retq 4260; 4261; AVX2OR512VL-LABEL: shuffle_v32i8_47_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_uu_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30: 4262; AVX2OR512VL: # %bb.0: 4263; AVX2OR512VL-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[15],ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],ymm1[31],ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30] 4264; AVX2OR512VL-NEXT: retq 4265; 4266; XOPAVX1-LABEL: shuffle_v32i8_47_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_uu_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30: 4267; XOPAVX1: # %bb.0: 4268; XOPAVX1-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] 4269; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 4270; XOPAVX1-NEXT: vpslldq {{.*#+}} xmm0 = zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] 4271; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 4272; XOPAVX1-NEXT: retq 4273; 4274; XOPAVX2-LABEL: shuffle_v32i8_47_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_uu_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30: 4275; XOPAVX2: # %bb.0: 4276; XOPAVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[15],ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],ymm1[31],ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30] 4277; XOPAVX2-NEXT: retq 4278 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 47, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 poison, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30> 4279 ret <32 x i8> %shuffle 4280} 4281 4282define <32 x i8> @shuffle_v32i8_uu_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_63_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu(<32 x i8> %a, <32 x i8> %b) { 4283; AVX1-LABEL: shuffle_v32i8_uu_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_63_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu: 4284; AVX1: # %bb.0: 4285; AVX1-NEXT: vpslldq {{.*#+}} xmm0 = zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] 4286; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 4287; AVX1-NEXT: vpsrldq {{.*#+}} xmm1 = xmm1[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 4288; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 4289; AVX1-NEXT: retq 4290; 4291; AVX2OR512VL-LABEL: shuffle_v32i8_uu_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_63_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu: 4292; AVX2OR512VL: # %bb.0: 4293; AVX2OR512VL-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[15],ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],ymm1[31],ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30] 4294; AVX2OR512VL-NEXT: retq 4295; 4296; XOPAVX1-LABEL: shuffle_v32i8_uu_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_63_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu: 4297; XOPAVX1: # %bb.0: 4298; XOPAVX1-NEXT: vpslldq {{.*#+}} xmm0 = zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] 4299; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 4300; XOPAVX1-NEXT: vpsrldq {{.*#+}} xmm1 = xmm1[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 4301; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 4302; XOPAVX1-NEXT: retq 4303; 4304; XOPAVX2-LABEL: shuffle_v32i8_uu_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_63_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu: 4305; XOPAVX2: # %bb.0: 4306; XOPAVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[15],ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],ymm1[31],ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30] 4307; XOPAVX2-NEXT: retq 4308 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 poison, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 63, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 4309 ret <32 x i8> %shuffle 4310} 4311 4312define <32 x i8> @shuffle_v32i8_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_63_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30(<32 x i8> %a, <32 x i8> %b) { 4313; AVX1-LABEL: shuffle_v32i8_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_63_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30: 4314; AVX1: # %bb.0: 4315; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 4316; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 4317; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] 4318; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 4319; AVX1-NEXT: retq 4320; 4321; AVX2OR512VL-LABEL: shuffle_v32i8_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_63_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30: 4322; AVX2OR512VL: # %bb.0: 4323; AVX2OR512VL-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[15],ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],ymm1[31],ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30] 4324; AVX2OR512VL-NEXT: retq 4325; 4326; XOPAVX1-LABEL: shuffle_v32i8_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_63_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30: 4327; XOPAVX1: # %bb.0: 4328; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 4329; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 4330; XOPAVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] 4331; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 4332; XOPAVX1-NEXT: retq 4333; 4334; XOPAVX2-LABEL: shuffle_v32i8_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_63_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30: 4335; XOPAVX2: # %bb.0: 4336; XOPAVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[15],ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],ymm1[31],ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30] 4337; XOPAVX2-NEXT: retq 4338 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 63, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30> 4339 ret <32 x i8> %shuffle 4340} 4341 4342define <32 x i8> @shuffle_v32i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_32_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_48(<32 x i8> %a, <32 x i8> %b) { 4343; AVX1-LABEL: shuffle_v32i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_32_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_48: 4344; AVX1: # %bb.0: 4345; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 4346; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 4347; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm3[0] 4348; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0] 4349; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 4350; AVX1-NEXT: retq 4351; 4352; AVX2OR512VL-LABEL: shuffle_v32i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_32_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_48: 4353; AVX2OR512VL: # %bb.0: 4354; AVX2OR512VL-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm1[0],ymm0[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm1[16] 4355; AVX2OR512VL-NEXT: retq 4356; 4357; XOPAVX1-LABEL: shuffle_v32i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_32_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_48: 4358; XOPAVX1: # %bb.0: 4359; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 4360; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 4361; XOPAVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm3[0] 4362; XOPAVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0] 4363; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 4364; XOPAVX1-NEXT: retq 4365; 4366; XOPAVX2-LABEL: shuffle_v32i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_32_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_48: 4367; XOPAVX2: # %bb.0: 4368; XOPAVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm1[0],ymm0[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm1[16] 4369; XOPAVX2-NEXT: retq 4370 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 32, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 48> 4371 ret <32 x i8> %shuffle 4372} 4373 4374define <32 x i8> @shuffle_v32i8_33_34_35_36_37_38_39_40_41_42_43_44_45_46_47_00_49_50_51_52_53_54_55_56_57_58_59_60_61_62_63_16(<32 x i8> %a, <32 x i8> %b) { 4375; AVX1-LABEL: shuffle_v32i8_33_34_35_36_37_38_39_40_41_42_43_44_45_46_47_00_49_50_51_52_53_54_55_56_57_58_59_60_61_62_63_16: 4376; AVX1: # %bb.0: 4377; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 4378; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 4379; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm3[0] 4380; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0] 4381; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 4382; AVX1-NEXT: retq 4383; 4384; AVX2OR512VL-LABEL: shuffle_v32i8_33_34_35_36_37_38_39_40_41_42_43_44_45_46_47_00_49_50_51_52_53_54_55_56_57_58_59_60_61_62_63_16: 4385; AVX2OR512VL: # %bb.0: 4386; AVX2OR512VL-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],ymm1[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16] 4387; AVX2OR512VL-NEXT: retq 4388; 4389; XOPAVX1-LABEL: shuffle_v32i8_33_34_35_36_37_38_39_40_41_42_43_44_45_46_47_00_49_50_51_52_53_54_55_56_57_58_59_60_61_62_63_16: 4390; XOPAVX1: # %bb.0: 4391; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 4392; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 4393; XOPAVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm3[0] 4394; XOPAVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0] 4395; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 4396; XOPAVX1-NEXT: retq 4397; 4398; XOPAVX2-LABEL: shuffle_v32i8_33_34_35_36_37_38_39_40_41_42_43_44_45_46_47_00_49_50_51_52_53_54_55_56_57_58_59_60_61_62_63_16: 4399; XOPAVX2: # %bb.0: 4400; XOPAVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],ymm1[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16] 4401; XOPAVX2-NEXT: retq 4402 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 00, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63, i32 16> 4403 ret <32 x i8> %shuffle 4404} 4405 4406define <32 x i8> @shuffle_v32i8_15_32_33_34_35_36_37_38_39_40_41_42_43_44_45_46_31_48_49_50_51_52_53_54_55_56_57_58_59_60_61_62(<32 x i8> %a, <32 x i8> %b) { 4407; AVX1-LABEL: shuffle_v32i8_15_32_33_34_35_36_37_38_39_40_41_42_43_44_45_46_31_48_49_50_51_52_53_54_55_56_57_58_59_60_61_62: 4408; AVX1: # %bb.0: 4409; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 4410; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 4411; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[15],xmm3[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] 4412; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] 4413; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 4414; AVX1-NEXT: retq 4415; 4416; AVX2OR512VL-LABEL: shuffle_v32i8_15_32_33_34_35_36_37_38_39_40_41_42_43_44_45_46_31_48_49_50_51_52_53_54_55_56_57_58_59_60_61_62: 4417; AVX2OR512VL: # %bb.0: 4418; AVX2OR512VL-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[15],ymm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],ymm0[31],ymm1[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30] 4419; AVX2OR512VL-NEXT: retq 4420; 4421; XOPAVX1-LABEL: shuffle_v32i8_15_32_33_34_35_36_37_38_39_40_41_42_43_44_45_46_31_48_49_50_51_52_53_54_55_56_57_58_59_60_61_62: 4422; XOPAVX1: # %bb.0: 4423; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 4424; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 4425; XOPAVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[15],xmm3[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] 4426; XOPAVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] 4427; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 4428; XOPAVX1-NEXT: retq 4429; 4430; XOPAVX2-LABEL: shuffle_v32i8_15_32_33_34_35_36_37_38_39_40_41_42_43_44_45_46_31_48_49_50_51_52_53_54_55_56_57_58_59_60_61_62: 4431; XOPAVX2: # %bb.0: 4432; XOPAVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[15],ymm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],ymm0[31],ymm1[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30] 4433; XOPAVX2-NEXT: retq 4434 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 15, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 31, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62> 4435 ret <32 x i8> %shuffle 4436} 4437 4438define <32 x i8> @shuffle_v32i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_16(<32 x i8> %a, <32 x i8> %b) { 4439; AVX1-LABEL: shuffle_v32i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_16: 4440; AVX1: # %bb.0: 4441; AVX1-NEXT: vpalignr {{.*#+}} xmm1 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0] 4442; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 4443; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0] 4444; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 4445; AVX1-NEXT: retq 4446; 4447; AVX2OR512VL-LABEL: shuffle_v32i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_16: 4448; AVX2OR512VL: # %bb.0: 4449; AVX2OR512VL-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,16] 4450; AVX2OR512VL-NEXT: retq 4451; 4452; XOPAVX1-LABEL: shuffle_v32i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_16: 4453; XOPAVX1: # %bb.0: 4454; XOPAVX1-NEXT: vpalignr {{.*#+}} xmm1 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0] 4455; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 4456; XOPAVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0] 4457; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 4458; XOPAVX1-NEXT: retq 4459; 4460; XOPAVX2-LABEL: shuffle_v32i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_16: 4461; XOPAVX2: # %bb.0: 4462; XOPAVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,16] 4463; XOPAVX2-NEXT: retq 4464 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16> 4465 ret <32 x i8> %shuffle 4466} 4467 4468define <32 x i8> @shuffle_v32i8_15_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_31_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30(<32 x i8> %a, <32 x i8> %b) { 4469; AVX1-LABEL: shuffle_v32i8_15_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_31_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30: 4470; AVX1: # %bb.0: 4471; AVX1-NEXT: vpalignr {{.*#+}} xmm1 = xmm0[15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] 4472; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 4473; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] 4474; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 4475; AVX1-NEXT: retq 4476; 4477; AVX2OR512VL-LABEL: shuffle_v32i8_15_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_31_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30: 4478; AVX2OR512VL: # %bb.0: 4479; AVX2OR512VL-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,31,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30] 4480; AVX2OR512VL-NEXT: retq 4481; 4482; XOPAVX1-LABEL: shuffle_v32i8_15_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_31_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30: 4483; XOPAVX1: # %bb.0: 4484; XOPAVX1-NEXT: vpalignr {{.*#+}} xmm1 = xmm0[15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] 4485; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 4486; XOPAVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] 4487; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 4488; XOPAVX1-NEXT: retq 4489; 4490; XOPAVX2-LABEL: shuffle_v32i8_15_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_31_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30: 4491; XOPAVX2: # %bb.0: 4492; XOPAVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,31,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30] 4493; XOPAVX2-NEXT: retq 4494 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30> 4495 ret <32 x i8> %shuffle 4496} 4497 4498; PR33740 4499define <32 x i8> @shuffle_v32i8_00_01_16_17_02_03_18_19_04_05_20_21_06_07_22_23_08_09_24_25_10_11_26_27_12_13_28_29_14_15_30_31(<32 x i8> %a, <32 x i8> %b) { 4500; AVX1-LABEL: shuffle_v32i8_00_01_16_17_02_03_18_19_04_05_20_21_06_07_22_23_08_09_24_25_10_11_26_27_12_13_28_29_14_15_30_31: 4501; AVX1: # %bb.0: 4502; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 4503; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 4504; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 4505; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 4506; AVX1-NEXT: retq 4507; 4508; AVX2-LABEL: shuffle_v32i8_00_01_16_17_02_03_18_19_04_05_20_21_06_07_22_23_08_09_24_25_10_11_26_27_12_13_28_29_14_15_30_31: 4509; AVX2: # %bb.0: 4510; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] 4511; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,8,9,2,3,10,11,4,5,12,13,6,7,14,15,16,17,24,25,18,19,26,27,20,21,28,29,22,23,30,31] 4512; AVX2-NEXT: retq 4513; 4514; AVX512VL-LABEL: shuffle_v32i8_00_01_16_17_02_03_18_19_04_05_20_21_06_07_22_23_08_09_24_25_10_11_26_27_12_13_28_29_14_15_30_31: 4515; AVX512VL: # %bb.0: 4516; AVX512VL-NEXT: vpmovsxbw {{.*#+}} ymm1 = [0,8,1,9,2,10,3,11,4,12,5,13,6,14,7,15] 4517; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0 4518; AVX512VL-NEXT: retq 4519; 4520; XOPAVX1-LABEL: shuffle_v32i8_00_01_16_17_02_03_18_19_04_05_20_21_06_07_22_23_08_09_24_25_10_11_26_27_12_13_28_29_14_15_30_31: 4521; XOPAVX1: # %bb.0: 4522; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 4523; XOPAVX1-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 4524; XOPAVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 4525; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 4526; XOPAVX1-NEXT: retq 4527; 4528; XOPAVX2-LABEL: shuffle_v32i8_00_01_16_17_02_03_18_19_04_05_20_21_06_07_22_23_08_09_24_25_10_11_26_27_12_13_28_29_14_15_30_31: 4529; XOPAVX2: # %bb.0: 4530; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] 4531; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,8,9,2,3,10,11,4,5,12,13,6,7,14,15,16,17,24,25,18,19,26,27,20,21,28,29,22,23,30,31] 4532; XOPAVX2-NEXT: retq 4533 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 1, i32 16, i32 17, i32 2, i32 3, i32 18, i32 19, i32 4, i32 5, i32 20, i32 21, i32 6, i32 7, i32 22, i32 23, i32 8, i32 9, i32 24, i32 25, i32 10, i32 11, i32 26, i32 27, i32 12, i32 13, i32 28, i32 29, i32 14, i32 15, i32 30, i32 31> 4534 ret <32 x i8> %shuffle 4535} 4536 4537define <32 x i8> @shuffle_v32i8_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_10_10_10_10_10_10_10_10_10_10_10_10_10_10_10_10(<32 x i8> %a, <32 x i8> %b) { 4538; AVX1-LABEL: shuffle_v32i8_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_10_10_10_10_10_10_10_10_10_10_10_10_10_10_10_10: 4539; AVX1: # %bb.0: 4540; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10] 4541; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 4542; AVX1-NEXT: retq 4543; 4544; AVX2OR512VL-LABEL: shuffle_v32i8_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_10_10_10_10_10_10_10_10_10_10_10_10_10_10_10_10: 4545; AVX2OR512VL: # %bb.0: 4546; AVX2OR512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10] 4547; AVX2OR512VL-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 4548; AVX2OR512VL-NEXT: retq 4549; 4550; XOPAVX1-LABEL: shuffle_v32i8_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_10_10_10_10_10_10_10_10_10_10_10_10_10_10_10_10: 4551; XOPAVX1: # %bb.0: 4552; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10] 4553; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 4554; XOPAVX1-NEXT: retq 4555; 4556; XOPAVX2-LABEL: shuffle_v32i8_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_10_10_10_10_10_10_10_10_10_10_10_10_10_10_10_10: 4557; XOPAVX2: # %bb.0: 4558; XOPAVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10] 4559; XOPAVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 4560; XOPAVX2-NEXT: retq 4561 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10> 4562 ret <32 x i8> %shuffle 4563} 4564 4565define <32 x i8> @shuffle_v32i8_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16(<32 x i8> %a, <32 x i8> %b) { 4566; AVX1-LABEL: shuffle_v32i8_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16: 4567; AVX1: # %bb.0: 4568; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 4569; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 4570; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0 4571; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 4572; AVX1-NEXT: retq 4573; 4574; AVX2-LABEL: shuffle_v32i8_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16: 4575; AVX2: # %bb.0: 4576; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 4577; AVX2-NEXT: vpbroadcastb %xmm0, %ymm0 4578; AVX2-NEXT: retq 4579; 4580; AVX512VLBW-LABEL: shuffle_v32i8_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16: 4581; AVX512VLBW: # %bb.0: 4582; AVX512VLBW-NEXT: vextracti128 $1, %ymm0, %xmm0 4583; AVX512VLBW-NEXT: vpbroadcastb %xmm0, %ymm0 4584; AVX512VLBW-NEXT: retq 4585; 4586; AVX512VLVBMI-SLOW-LABEL: shuffle_v32i8_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16: 4587; AVX512VLVBMI-SLOW: # %bb.0: 4588; AVX512VLVBMI-SLOW-NEXT: vextracti128 $1, %ymm0, %xmm0 4589; AVX512VLVBMI-SLOW-NEXT: vpbroadcastb %xmm0, %ymm0 4590; AVX512VLVBMI-SLOW-NEXT: retq 4591; 4592; AVX512VLVBMI-FAST-ALL-LABEL: shuffle_v32i8_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16: 4593; AVX512VLVBMI-FAST-ALL: # %bb.0: 4594; AVX512VLVBMI-FAST-ALL-NEXT: vpbroadcastb {{.*#+}} ymm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] 4595; AVX512VLVBMI-FAST-ALL-NEXT: vpermb %ymm0, %ymm1, %ymm0 4596; AVX512VLVBMI-FAST-ALL-NEXT: retq 4597; 4598; AVX512VLVBMI-FAST-PERLANE-LABEL: shuffle_v32i8_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16: 4599; AVX512VLVBMI-FAST-PERLANE: # %bb.0: 4600; AVX512VLVBMI-FAST-PERLANE-NEXT: vextracti128 $1, %ymm0, %xmm0 4601; AVX512VLVBMI-FAST-PERLANE-NEXT: vpbroadcastb %xmm0, %ymm0 4602; AVX512VLVBMI-FAST-PERLANE-NEXT: retq 4603; 4604; XOPAVX1-LABEL: shuffle_v32i8_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16: 4605; XOPAVX1: # %bb.0: 4606; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 4607; XOPAVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 4608; XOPAVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0 4609; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 4610; XOPAVX1-NEXT: retq 4611; 4612; XOPAVX2-LABEL: shuffle_v32i8_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16: 4613; XOPAVX2: # %bb.0: 4614; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 4615; XOPAVX2-NEXT: vpbroadcastb %xmm0, %ymm0 4616; XOPAVX2-NEXT: retq 4617 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16> 4618 ret <32 x i8> %shuffle 4619} 4620 4621define <32 x i8> @shuffle_v32i8_15_15_15_15_15_15_15_15_32_32_32_32_32_32_32_32_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu(<32 x i8> %a, <32 x i8> %b) { 4622; AVX1-LABEL: shuffle_v32i8_15_15_15_15_15_15_15_15_32_32_32_32_32_32_32_32_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu: 4623; AVX1: # %bb.0: 4624; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[15,15,15,15,15,15,15,15,u,u,u,u,u,u,u,u] 4625; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 4626; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7] 4627; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 4628; AVX1-NEXT: retq 4629; 4630; AVX2-LABEL: shuffle_v32i8_15_15_15_15_15_15_15_15_32_32_32_32_32_32_32_32_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu: 4631; AVX2: # %bb.0: 4632; AVX2-NEXT: vpbroadcastb %xmm1, %xmm1 4633; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[15,15,15,15,15,15,15,15,u,u,u,u,u,u,u,u] 4634; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 4635; AVX2-NEXT: retq 4636; 4637; AVX512VLBW-LABEL: shuffle_v32i8_15_15_15_15_15_15_15_15_32_32_32_32_32_32_32_32_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu: 4638; AVX512VLBW: # %bb.0: 4639; AVX512VLBW-NEXT: vpbroadcastb %xmm1, %xmm1 4640; AVX512VLBW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[15,15,15,15,15,15,15,15,u,u,u,u,u,u,u,u] 4641; AVX512VLBW-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 4642; AVX512VLBW-NEXT: retq 4643; 4644; AVX512VLVBMI-LABEL: shuffle_v32i8_15_15_15_15_15_15_15_15_32_32_32_32_32_32_32_32_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu: 4645; AVX512VLVBMI: # %bb.0: 4646; AVX512VLVBMI-NEXT: vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,16,16,16,16,16,16,16,16] 4647; AVX512VLVBMI-NEXT: vpermt2b %xmm1, %xmm2, %xmm0 4648; AVX512VLVBMI-NEXT: retq 4649; 4650; XOP-LABEL: shuffle_v32i8_15_15_15_15_15_15_15_15_32_32_32_32_32_32_32_32_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu: 4651; XOP: # %bb.0: 4652; XOP-NEXT: vpperm {{.*#+}} xmm0 = xmm0[15,15,15,15,15,15,15,15],xmm1[0,0,0,0,0,0,0,0] 4653; XOP-NEXT: retq 4654; AVX-LABEL: shuffle_v32i8_15_15_15_15_15_15_15_15_32_32_32_32_32_32_32_32_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu: 4655; AVX: # %bb.0: 4656; AVX-NEXT: vpperm {{.*#+}} xmm0 = xmm0[15,15,15,15,15,15,15,15],xmm1[0,0,0,0,0,0,0,0] 4657; AVX-NEXT: retq 4658 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 4659 ret <32 x i8> %shuffle 4660} 4661 4662define <32 x i8> @shuffle_v32i8_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu(<32 x i8> %a, <32 x i8> %b) { 4663; ALL-LABEL: shuffle_v32i8_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu: 4664; ALL: # %bb.0: 4665; ALL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 4666; ALL-NEXT: retq 4667 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 4668 ret <32 x i8> %shuffle 4669} 4670 4671define <32 x i8> @shuffle_v32i8_22_22_22_22_22_22_22_22_22_22_22_22_22_22_22_22_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu(<32 x i8> %a, <32 x i8> %b) { 4672; AVX1-LABEL: shuffle_v32i8_22_22_22_22_22_22_22_22_22_22_22_22_22_22_22_22_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu: 4673; AVX1: # %bb.0: 4674; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 4675; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6] 4676; AVX1-NEXT: retq 4677; 4678; AVX2OR512VL-LABEL: shuffle_v32i8_22_22_22_22_22_22_22_22_22_22_22_22_22_22_22_22_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu: 4679; AVX2OR512VL: # %bb.0: 4680; AVX2OR512VL-NEXT: vextracti128 $1, %ymm0, %xmm0 4681; AVX2OR512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6] 4682; AVX2OR512VL-NEXT: retq 4683; 4684; XOPAVX1-LABEL: shuffle_v32i8_22_22_22_22_22_22_22_22_22_22_22_22_22_22_22_22_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu: 4685; XOPAVX1: # %bb.0: 4686; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 4687; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6] 4688; XOPAVX1-NEXT: retq 4689; 4690; XOPAVX2-LABEL: shuffle_v32i8_22_22_22_22_22_22_22_22_22_22_22_22_22_22_22_22_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu: 4691; XOPAVX2: # %bb.0: 4692; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 4693; XOPAVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6] 4694; XOPAVX2-NEXT: retq 4695 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 4696 ret <32 x i8> %shuffle 4697} 4698 4699; PR36933 4700define <32 x i8> @shuffle_v32i8_31_32_33_34_35_36_37_38_39_40_41_42_43_44_45_46_47_48_49_50_51_52_53_54_55_56_57_58_59_60_61_62(<32 x i8> %a0, <32 x i8> %a1) { 4701; AVX1-LABEL: shuffle_v32i8_31_32_33_34_35_36_37_38_39_40_41_42_43_44_45_46_47_48_49_50_51_52_53_54_55_56_57_58_59_60_61_62: 4702; AVX1: # %bb.0: 4703; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 4704; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm1[15],xmm2[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] 4705; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 4706; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] 4707; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 4708; AVX1-NEXT: retq 4709; 4710; AVX2-LABEL: shuffle_v32i8_31_32_33_34_35_36_37_38_39_40_41_42_43_44_45_46_47_48_49_50_51_52_53_54_55_56_57_58_59_60_61_62: 4711; AVX2: # %bb.0: 4712; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] 4713; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[15],ymm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],ymm0[31],ymm1[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30] 4714; AVX2-NEXT: retq 4715; 4716; AVX512VLBW-LABEL: shuffle_v32i8_31_32_33_34_35_36_37_38_39_40_41_42_43_44_45_46_47_48_49_50_51_52_53_54_55_56_57_58_59_60_61_62: 4717; AVX512VLBW: # %bb.0: 4718; AVX512VLBW-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] 4719; AVX512VLBW-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[15],ymm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],ymm0[31],ymm1[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30] 4720; AVX512VLBW-NEXT: retq 4721; 4722; AVX512VLVBMI-LABEL: shuffle_v32i8_31_32_33_34_35_36_37_38_39_40_41_42_43_44_45_46_47_48_49_50_51_52_53_54_55_56_57_58_59_60_61_62: 4723; AVX512VLVBMI: # %bb.0: 4724; AVX512VLVBMI-NEXT: vmovdqa {{.*#+}} ymm2 = [63,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30] 4725; AVX512VLVBMI-NEXT: vpermi2b %ymm0, %ymm1, %ymm2 4726; AVX512VLVBMI-NEXT: vmovdqa %ymm2, %ymm0 4727; AVX512VLVBMI-NEXT: retq 4728; 4729; XOPAVX1-LABEL: shuffle_v32i8_31_32_33_34_35_36_37_38_39_40_41_42_43_44_45_46_47_48_49_50_51_52_53_54_55_56_57_58_59_60_61_62: 4730; XOPAVX1: # %bb.0: 4731; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 4732; XOPAVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm1[15],xmm2[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] 4733; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 4734; XOPAVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] 4735; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 4736; XOPAVX1-NEXT: retq 4737; 4738; XOPAVX2-LABEL: shuffle_v32i8_31_32_33_34_35_36_37_38_39_40_41_42_43_44_45_46_47_48_49_50_51_52_53_54_55_56_57_58_59_60_61_62: 4739; XOPAVX2: # %bb.0: 4740; XOPAVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] 4741; XOPAVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[15],ymm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],ymm0[31],ymm1[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30] 4742; XOPAVX2-NEXT: retq 4743 %shuffle = shufflevector <32 x i8> %a0, <32 x i8> %a1, <32 x i32> <i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62> 4744 ret <32 x i8> %shuffle 4745} 4746 4747define <32 x i8> @shuffle_v32i8_03_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14_19_16_17_18_23_20_21_22_27_24_25_26_31_28_29_30(<32 x i8> %a) { 4748; AVX1-LABEL: shuffle_v32i8_03_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14_19_16_17_18_23_20_21_22_27_24_25_26_31_28_29_30: 4749; AVX1: # %bb.0: 4750; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 4751; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14] 4752; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 4753; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 4754; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 4755; AVX1-NEXT: retq 4756; 4757; AVX2-LABEL: shuffle_v32i8_03_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14_19_16_17_18_23_20_21_22_27_24_25_26_31_28_29_30: 4758; AVX2: # %bb.0: 4759; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14,19,16,17,18,23,20,21,22,27,24,25,26,31,28,29,30] 4760; AVX2-NEXT: retq 4761; 4762; AVX512VL-LABEL: shuffle_v32i8_03_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14_19_16_17_18_23_20_21_22_27_24_25_26_31_28_29_30: 4763; AVX512VL: # %bb.0: 4764; AVX512VL-NEXT: vprold $8, %ymm0, %ymm0 4765; AVX512VL-NEXT: retq 4766; 4767; XOPAVX1-LABEL: shuffle_v32i8_03_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14_19_16_17_18_23_20_21_22_27_24_25_26_31_28_29_30: 4768; XOPAVX1: # %bb.0: 4769; XOPAVX1-NEXT: vprotd $8, %xmm0, %xmm1 4770; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 4771; XOPAVX1-NEXT: vprotd $8, %xmm0, %xmm0 4772; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 4773; XOPAVX1-NEXT: retq 4774; 4775; XOPAVX2-LABEL: shuffle_v32i8_03_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14_19_16_17_18_23_20_21_22_27_24_25_26_31_28_29_30: 4776; XOPAVX2: # %bb.0: 4777; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14,19,16,17,18,23,20,21,22,27,24,25,26,31,28,29,30] 4778; XOPAVX2-NEXT: retq 4779 %shuffle = shufflevector <32 x i8> %a, <32 x i8> poison, <32 x i32> <i32 3, i32 0, i32 1, i32 2, i32 7, i32 4, i32 5, i32 6, i32 11, i32 8, i32 9, i32 10, i32 15, i32 12, i32 13, i32 14, i32 19, i32 16, i32 17, i32 18, i32 23, i32 20, i32 21, i32 22, i32 27, i32 24, i32 25, i32 26, i32 31, i32 28, i32 29, i32 30> 4780 ret <32 x i8> %shuffle 4781} 4782 4783; PR44379 4784define <32 x i8> @shuffle_v32i8_02_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09_18_19_20_21_22_23_16_17_26_27_28_29_30_31_24_25(<32 x i8> %a) { 4785; AVX1-LABEL: shuffle_v32i8_02_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09_18_19_20_21_22_23_16_17_26_27_28_29_30_31_24_25: 4786; AVX1: # %bb.0: 4787; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[1,2,3,0,4,5,6,7] 4788; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,5,6,7,4] 4789; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 4790; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,2,3,0,4,5,6,7] 4791; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,6,7,4] 4792; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 4793; AVX1-NEXT: retq 4794; 4795; AVX2-SLOW-LABEL: shuffle_v32i8_02_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09_18_19_20_21_22_23_16_17_26_27_28_29_30_31_24_25: 4796; AVX2-SLOW: # %bb.0: 4797; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[1,2,3,0,4,5,6,7,9,10,11,8,12,13,14,15] 4798; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,5,6,7,4,8,9,10,11,13,14,15,12] 4799; AVX2-SLOW-NEXT: retq 4800; 4801; AVX2-FAST-LABEL: shuffle_v32i8_02_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09_18_19_20_21_22_23_16_17_26_27_28_29_30_31_24_25: 4802; AVX2-FAST: # %bb.0: 4803; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[2,3,4,5,6,7,0,1,10,11,12,13,14,15,8,9,18,19,20,21,22,23,16,17,26,27,28,29,30,31,24,25] 4804; AVX2-FAST-NEXT: retq 4805; 4806; AVX512VL-LABEL: shuffle_v32i8_02_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09_18_19_20_21_22_23_16_17_26_27_28_29_30_31_24_25: 4807; AVX512VL: # %bb.0: 4808; AVX512VL-NEXT: vprolq $48, %ymm0, %ymm0 4809; AVX512VL-NEXT: retq 4810; 4811; XOPAVX1-LABEL: shuffle_v32i8_02_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09_18_19_20_21_22_23_16_17_26_27_28_29_30_31_24_25: 4812; XOPAVX1: # %bb.0: 4813; XOPAVX1-NEXT: vprotq $48, %xmm0, %xmm1 4814; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 4815; XOPAVX1-NEXT: vprotq $48, %xmm0, %xmm0 4816; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 4817; XOPAVX1-NEXT: retq 4818; 4819; XOPAVX2-LABEL: shuffle_v32i8_02_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09_18_19_20_21_22_23_16_17_26_27_28_29_30_31_24_25: 4820; XOPAVX2: # %bb.0: 4821; XOPAVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[1,2,3,0,4,5,6,7,9,10,11,8,12,13,14,15] 4822; XOPAVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,5,6,7,4,8,9,10,11,13,14,15,12] 4823; XOPAVX2-NEXT: retq 4824 %shuffle = shufflevector <32 x i8> %a, <32 x i8> poison, <32 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 16, i32 17, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 24, i32 25> 4825 ret <32 x i8> %shuffle 4826} 4827 4828; PR47194 4829define <32 x i8> @shuffle_v32i8_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31(<32 x i8> %a) { 4830; AVX1-LABEL: shuffle_v32i8_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31: 4831; AVX1: # %bb.0: 4832; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 4833; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,7,7,7] 4834; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,2,2] 4835; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 4836; AVX1-NEXT: retq 4837; 4838; AVX2-SLOW-LABEL: shuffle_v32i8_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31: 4839; AVX2-SLOW: # %bb.0: 4840; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,7,7,7,8,9,10,11,15,15,15,15] 4841; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,2,2,2,6,6,6,6] 4842; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,2,2,2] 4843; AVX2-SLOW-NEXT: retq 4844; 4845; AVX2-FAST-ALL-LABEL: shuffle_v32i8_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31: 4846; AVX2-FAST-ALL: # %bb.0: 4847; AVX2-FAST-ALL-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,7,7,7,8,9,10,11,15,15,15,15] 4848; AVX2-FAST-ALL-NEXT: vpbroadcastd {{.*#+}} ymm1 = [6,6,6,6,6,6,6,6] 4849; AVX2-FAST-ALL-NEXT: vpermd %ymm0, %ymm1, %ymm0 4850; AVX2-FAST-ALL-NEXT: retq 4851; 4852; AVX2-FAST-PERLANE-LABEL: shuffle_v32i8_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31: 4853; AVX2-FAST-PERLANE: # %bb.0: 4854; AVX2-FAST-PERLANE-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,30,31,30,31,30,31,30,31,u,u,u,u,u,u,u,u] 4855; AVX2-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,2,2,2] 4856; AVX2-FAST-PERLANE-NEXT: retq 4857; 4858; AVX512VL-LABEL: shuffle_v32i8_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31: 4859; AVX512VL: # %bb.0: 4860; AVX512VL-NEXT: vpbroadcastw {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 4861; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0 4862; AVX512VL-NEXT: retq 4863; 4864; XOPAVX1-LABEL: shuffle_v32i8_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31: 4865; XOPAVX1: # %bb.0: 4866; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 4867; XOPAVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,7,7,7] 4868; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,2,2] 4869; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 4870; XOPAVX1-NEXT: retq 4871; 4872; XOPAVX2-LABEL: shuffle_v32i8_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31: 4873; XOPAVX2: # %bb.0: 4874; XOPAVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,7,7,7,8,9,10,11,15,15,15,15] 4875; XOPAVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,2,2,2,6,6,6,6] 4876; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,2,2,2] 4877; XOPAVX2-NEXT: retq 4878 %shuffle = shufflevector <32 x i8> %a, <32 x i8> poison, <32 x i32> <i32 30, i32 31, i32 30, i32 31, i32 30, i32 31, i32 30, i32 31, i32 30, i32 31, i32 30, i32 31, i32 30, i32 31, i32 30, i32 31, i32 30, i32 31, i32 30, i32 31, i32 30, i32 31, i32 30, i32 31, i32 30, i32 31, i32 30, i32 31, i32 30, i32 31, i32 30, i32 31> 4879 ret <32 x i8> %shuffle 4880} 4881 4882define <32 x i8> @shuffle_v32i8_00_02_04_06_08_10_12_14_32_34_36_38_40_42_44_46_16_18_20_22_24_26_28_30_48_50_52_54_56_58_60_62(<32 x i8> %a0, <32 x i8> %a1) { 4883; AVX1-LABEL: shuffle_v32i8_00_02_04_06_08_10_12_14_32_34_36_38_40_42_44_46_16_18_20_22_24_26_28_30_48_50_52_54_56_58_60_62: 4884; AVX1: # %bb.0: 4885; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 4886; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255] 4887; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2 4888; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 4889; AVX1-NEXT: vpand %xmm3, %xmm4, %xmm4 4890; AVX1-NEXT: vpackuswb %xmm2, %xmm4, %xmm2 4891; AVX1-NEXT: vpand %xmm3, %xmm1, %xmm1 4892; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm0 4893; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 4894; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 4895; AVX1-NEXT: retq 4896; 4897; AVX2-LABEL: shuffle_v32i8_00_02_04_06_08_10_12_14_32_34_36_38_40_42_44_46_16_18_20_22_24_26_28_30_48_50_52_54_56_58_60_62: 4898; AVX2: # %bb.0: 4899; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [0,2,4,6,8,10,12,14,0,2,4,6,8,10,12,14,0,2,4,6,8,10,12,14,0,2,4,6,8,10,12,14] 4900; AVX2-NEXT: vpshufb %ymm2, %ymm1, %ymm1 4901; AVX2-NEXT: vpshufb %ymm2, %ymm0, %ymm0 4902; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7] 4903; AVX2-NEXT: retq 4904; 4905; AVX512VLBW-LABEL: shuffle_v32i8_00_02_04_06_08_10_12_14_32_34_36_38_40_42_44_46_16_18_20_22_24_26_28_30_48_50_52_54_56_58_60_62: 4906; AVX512VLBW: # %bb.0: 4907; AVX512VLBW-NEXT: vpbroadcastq {{.*#+}} ymm2 = [0,2,4,6,8,10,12,14,0,2,4,6,8,10,12,14,0,2,4,6,8,10,12,14,0,2,4,6,8,10,12,14] 4908; AVX512VLBW-NEXT: vpshufb %ymm2, %ymm1, %ymm1 4909; AVX512VLBW-NEXT: vpshufb %ymm2, %ymm0, %ymm0 4910; AVX512VLBW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7] 4911; AVX512VLBW-NEXT: retq 4912; 4913; AVX512VLVBMI-LABEL: shuffle_v32i8_00_02_04_06_08_10_12_14_32_34_36_38_40_42_44_46_16_18_20_22_24_26_28_30_48_50_52_54_56_58_60_62: 4914; AVX512VLVBMI: # %bb.0: 4915; AVX512VLVBMI-NEXT: vmovdqa {{.*#+}} ymm2 = [0,2,4,6,8,10,12,14,32,34,36,38,40,42,44,46,16,18,20,22,24,26,28,30,48,50,52,54,56,58,60,62] 4916; AVX512VLVBMI-NEXT: vpermt2b %ymm1, %ymm2, %ymm0 4917; AVX512VLVBMI-NEXT: retq 4918; 4919; XOPAVX1-LABEL: shuffle_v32i8_00_02_04_06_08_10_12_14_32_34_36_38_40_42_44_46_16_18_20_22_24_26_28_30_48_50_52_54_56_58_60_62: 4920; XOPAVX1: # %bb.0: 4921; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 4922; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 4923; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30] 4924; XOPAVX1-NEXT: vpperm %xmm4, %xmm2, %xmm3, %xmm2 4925; XOPAVX1-NEXT: vpperm %xmm4, %xmm1, %xmm0, %xmm0 4926; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 4927; XOPAVX1-NEXT: retq 4928; 4929; XOPAVX2-LABEL: shuffle_v32i8_00_02_04_06_08_10_12_14_32_34_36_38_40_42_44_46_16_18_20_22_24_26_28_30_48_50_52_54_56_58_60_62: 4930; XOPAVX2: # %bb.0: 4931; XOPAVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [0,2,4,6,8,10,12,14,0,2,4,6,8,10,12,14,0,2,4,6,8,10,12,14,0,2,4,6,8,10,12,14] 4932; XOPAVX2-NEXT: vpshufb %ymm2, %ymm1, %ymm1 4933; XOPAVX2-NEXT: vpshufb %ymm2, %ymm0, %ymm0 4934; XOPAVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7] 4935; XOPAVX2-NEXT: retq 4936 %1 = shufflevector <32 x i8> %a0, <32 x i8> %a1, <32 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 32, i32 34, i32 36, i32 38, i32 40, i32 42, i32 44, i32 46, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30, i32 48, i32 50, i32 52, i32 54, i32 56, i32 58, i32 60, i32 62> 4937 ret <32 x i8> %1 4938} 4939 4940define <32 x i8> @shuffle_v32i8_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30_32_34_36_38_40_42_44_46_48_50_52_54_56_58_60_62(<32 x i8> %a0, <32 x i8> %a1) { 4941; AVX1-LABEL: shuffle_v32i8_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30_32_34_36_38_40_42_44_46_48_50_52_54_56_58_60_62: 4942; AVX1: # %bb.0: 4943; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 4944; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255] 4945; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2 4946; AVX1-NEXT: vpand %xmm3, %xmm1, %xmm1 4947; AVX1-NEXT: vpackuswb %xmm2, %xmm1, %xmm1 4948; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 4949; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2 4950; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm0 4951; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 4952; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 4953; AVX1-NEXT: retq 4954; 4955; AVX2-LABEL: shuffle_v32i8_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30_32_34_36_38_40_42_44_46_48_50_52_54_56_58_60_62: 4956; AVX2: # %bb.0: 4957; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [0,2,4,6,8,10,12,14,0,2,4,6,8,10,12,14,0,2,4,6,8,10,12,14,0,2,4,6,8,10,12,14] 4958; AVX2-NEXT: vpshufb %ymm2, %ymm1, %ymm1 4959; AVX2-NEXT: vpshufb %ymm2, %ymm0, %ymm0 4960; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7] 4961; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] 4962; AVX2-NEXT: retq 4963; 4964; AVX512VL-LABEL: shuffle_v32i8_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30_32_34_36_38_40_42_44_46_48_50_52_54_56_58_60_62: 4965; AVX512VL: # %bb.0: 4966; AVX512VL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 4967; AVX512VL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 4968; AVX512VL-NEXT: vpmovwb %zmm0, %ymm0 4969; AVX512VL-NEXT: retq 4970; 4971; XOPAVX1-LABEL: shuffle_v32i8_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30_32_34_36_38_40_42_44_46_48_50_52_54_56_58_60_62: 4972; XOPAVX1: # %bb.0: 4973; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 4974; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30] 4975; XOPAVX1-NEXT: vpperm %xmm3, %xmm2, %xmm1, %xmm1 4976; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 4977; XOPAVX1-NEXT: vpperm %xmm3, %xmm2, %xmm0, %xmm0 4978; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 4979; XOPAVX1-NEXT: retq 4980; 4981; XOPAVX2-LABEL: shuffle_v32i8_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30_32_34_36_38_40_42_44_46_48_50_52_54_56_58_60_62: 4982; XOPAVX2: # %bb.0: 4983; XOPAVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [0,2,4,6,8,10,12,14,0,2,4,6,8,10,12,14,0,2,4,6,8,10,12,14,0,2,4,6,8,10,12,14] 4984; XOPAVX2-NEXT: vpshufb %ymm2, %ymm1, %ymm1 4985; XOPAVX2-NEXT: vpshufb %ymm2, %ymm0, %ymm0 4986; XOPAVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7] 4987; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] 4988; XOPAVX2-NEXT: retq 4989 %1 = shufflevector <32 x i8> %a0, <32 x i8> %a1, <32 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30, i32 32, i32 34, i32 36, i32 38, i32 40, i32 42, i32 44, i32 46, i32 48, i32 50, i32 52, i32 54, i32 56, i32 58, i32 60, i32 62> 4990 ret <32 x i8> %1 4991} 4992 4993define <32 x i8> @shuffle_v32i8_shift_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30_32_34_36_38_40_42_44_46_48_50_52_54_56_58_60_62(<16 x i16> %a0, <16 x i16> %a1) { 4994; AVX1-LABEL: shuffle_v32i8_shift_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30_32_34_36_38_40_42_44_46_48_50_52_54_56_58_60_62: 4995; AVX1: # %bb.0: 4996; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 4997; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2 4998; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 4999; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 5000; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 5001; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2 5002; AVX1-NEXT: vpsrlw $8, %xmm1, %xmm1 5003; AVX1-NEXT: vpackuswb %xmm2, %xmm1, %xmm1 5004; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 5005; AVX1-NEXT: retq 5006; 5007; AVX2-LABEL: shuffle_v32i8_shift_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30_32_34_36_38_40_42_44_46_48_50_52_54_56_58_60_62: 5008; AVX2: # %bb.0: 5009; AVX2-NEXT: vpsrlw $8, %ymm0, %ymm0 5010; AVX2-NEXT: vpsrlw $8, %ymm1, %ymm1 5011; AVX2-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 5012; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] 5013; AVX2-NEXT: retq 5014; 5015; AVX512VL-LABEL: shuffle_v32i8_shift_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30_32_34_36_38_40_42_44_46_48_50_52_54_56_58_60_62: 5016; AVX512VL: # %bb.0: 5017; AVX512VL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 5018; AVX512VL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 5019; AVX512VL-NEXT: vpsrlw $8, %zmm0, %zmm0 5020; AVX512VL-NEXT: vpmovwb %zmm0, %ymm0 5021; AVX512VL-NEXT: retq 5022; 5023; XOPAVX1-LABEL: shuffle_v32i8_shift_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30_32_34_36_38_40_42_44_46_48_50_52_54_56_58_60_62: 5024; XOPAVX1: # %bb.0: 5025; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 5026; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 5027; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [1,3,5,7,9,11,13,15,17,19,21,23,25,27,29,31] 5028; XOPAVX1-NEXT: vpperm %xmm4, %xmm3, %xmm1, %xmm1 5029; XOPAVX1-NEXT: vpperm %xmm4, %xmm2, %xmm0, %xmm0 5030; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 5031; XOPAVX1-NEXT: retq 5032; 5033; XOPAVX2-LABEL: shuffle_v32i8_shift_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30_32_34_36_38_40_42_44_46_48_50_52_54_56_58_60_62: 5034; XOPAVX2: # %bb.0: 5035; XOPAVX2-NEXT: vpsrlw $8, %ymm0, %ymm0 5036; XOPAVX2-NEXT: vpsrlw $8, %ymm1, %ymm1 5037; XOPAVX2-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 5038; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] 5039; XOPAVX2-NEXT: retq 5040 %1 = lshr <16 x i16> %a0, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> 5041 %2 = lshr <16 x i16> %a1, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> 5042 %3 = bitcast <16 x i16> %1 to <32 x i8> 5043 %4 = bitcast <16 x i16> %2 to <32 x i8> 5044 %5 = shufflevector <32 x i8> %3, <32 x i8> %4, <32 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30, i32 32, i32 34, i32 36, i32 38, i32 40, i32 42, i32 44, i32 46, i32 48, i32 50, i32 52, i32 54, i32 56, i32 58, i32 60, i32 62> 5045 ret <32 x i8> %5 5046} 5047 5048define <4 x i64> @PR28136(<32 x i8> %a0, <32 x i8> %a1) { 5049; AVX1-LABEL: PR28136: 5050; AVX1: # %bb.0: 5051; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 5052; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 5053; AVX1-NEXT: vpshufb {{.*#+}} xmm2 = zero,xmm1[8],zero,xmm1[10],zero,xmm1[12],zero,xmm1[14],zero,xmm1[9],zero,xmm1[11],zero,xmm1[13],zero,xmm1[15] 5054; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 5055; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7] 5056; AVX1-NEXT: vpshufb {{.*#+}} xmm3 = xmm0[8],zero,xmm0[10],zero,xmm0[12],zero,xmm0[14],zero,xmm0[9],zero,xmm0[11],zero,xmm0[13],zero,xmm0[15],zero 5057; AVX1-NEXT: vpor %xmm2, %xmm3, %xmm2 5058; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = zero,xmm1[0],zero,xmm1[2],zero,xmm1[4],zero,xmm1[6],zero,xmm1[1],zero,xmm1[3],zero,xmm1[5],zero,xmm1[7] 5059; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0],zero,xmm0[2],zero,xmm0[4],zero,xmm0[6],zero,xmm0[1],zero,xmm0[3],zero,xmm0[5],zero,xmm0[7],zero 5060; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 5061; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 5062; AVX1-NEXT: retq 5063; 5064; AVX2OR512VL-LABEL: PR28136: 5065; AVX2OR512VL: # %bb.0: 5066; AVX2OR512VL-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] 5067; AVX2OR512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] 5068; AVX2OR512VL-NEXT: retq 5069; 5070; XOPAVX1-LABEL: PR28136: 5071; XOPAVX1: # %bb.0: 5072; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 5073; XOPAVX1-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 5074; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 5075; XOPAVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] 5076; XOPAVX1-NEXT: vpperm {{.*#+}} xmm2 = xmm0[8],xmm1[8],xmm0[10],xmm1[10],xmm0[12],xmm1[12],xmm0[14],xmm1[14],xmm0[9],xmm1[9],xmm0[11],xmm1[11],xmm0[13],xmm1[13],xmm0[15],xmm1[15] 5077; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2],xmm1[2],xmm0[4],xmm1[4],xmm0[6],xmm1[6],xmm0[1],xmm1[1],xmm0[3],xmm1[3],xmm0[5],xmm1[5],xmm0[7],xmm1[7] 5078; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 5079; XOPAVX1-NEXT: retq 5080; 5081; XOPAVX2-LABEL: PR28136: 5082; XOPAVX2: # %bb.0: 5083; XOPAVX2-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] 5084; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] 5085; XOPAVX2-NEXT: retq 5086 %1 = shufflevector <32 x i8> %a0, <32 x i8> %a1, <32 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 16, i32 48, i32 17, i32 49, i32 18, i32 50,i32 19, i32 51, i32 20, i32 52, i32 21, i32 53, i32 22, i32 54, i32 23, i32 55> 5087 %2 = bitcast <32 x i8> %1 to <4 x i64> 5088 %3 = shufflevector <4 x i64> %2, <4 x i64> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3> 5089 ret <4 x i64> %3 5090} 5091 5092define <32 x i8> @PR47262(<4 x i64> %a0) { 5093; AVX1-LABEL: PR47262: 5094; AVX1: # %bb.0: 5095; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 5096; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] 5097; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,8,1,9,2,10,3,11,4,12,5,13,6,14,7,15] 5098; AVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2 5099; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 5100; AVX1-NEXT: vpshufb %xmm3, %xmm0, %xmm0 5101; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 5102; AVX1-NEXT: retq 5103; 5104; AVX2-LABEL: PR47262: 5105; AVX2: # %bb.0: 5106; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] 5107; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12,1,5,9,13,2,6,10,14,3,7,11,15,16,20,24,28,17,21,25,29,18,22,26,30,19,23,27,31] 5108; AVX2-NEXT: retq 5109; 5110; AVX512VLBW-LABEL: PR47262: 5111; AVX512VLBW: # %bb.0: 5112; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] 5113; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12,1,5,9,13,2,6,10,14,3,7,11,15,16,20,24,28,17,21,25,29,18,22,26,30,19,23,27,31] 5114; AVX512VLBW-NEXT: retq 5115; 5116; AVX512VLVBMI-LABEL: PR47262: 5117; AVX512VLVBMI: # %bb.0: 5118; AVX512VLVBMI-NEXT: vmovdqa {{.*#+}} ymm1 = [0,4,16,20,1,5,17,21,2,6,18,22,3,7,19,23,8,12,24,28,9,13,25,29,10,14,26,30,11,15,27,31] 5119; AVX512VLVBMI-NEXT: vpermb %ymm0, %ymm1, %ymm0 5120; AVX512VLVBMI-NEXT: retq 5121; 5122; XOPAVX1-LABEL: PR47262: 5123; XOPAVX1: # %bb.0: 5124; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 5125; XOPAVX1-NEXT: vpperm {{.*#+}} xmm2 = xmm0[8,12],xmm1[8,12],xmm0[9,13],xmm1[9,13],xmm0[10,14],xmm1[10,14],xmm0[11,15],xmm1[11,15] 5126; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[0,4],xmm1[0,4],xmm0[1,5],xmm1[1,5],xmm0[2,6],xmm1[2,6],xmm0[3,7],xmm1[3,7] 5127; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 5128; XOPAVX1-NEXT: retq 5129; 5130; XOPAVX2-LABEL: PR47262: 5131; XOPAVX2: # %bb.0: 5132; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] 5133; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12,1,5,9,13,2,6,10,14,3,7,11,15,16,20,24,28,17,21,25,29,18,22,26,30,19,23,27,31] 5134; XOPAVX2-NEXT: retq 5135 %t1 = shufflevector <4 x i64> %a0, <4 x i64> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3> 5136 %t2 = bitcast <4 x i64> %t1 to <32 x i8> 5137 %t3 = shufflevector <32 x i8> %t2, <32 x i8> poison, <32 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15, i32 16, i32 20, i32 24, i32 28, i32 17, i32 21, i32 25, i32 29, i32 18, i32 22, i32 26, i32 30, i32 19, i32 23, i32 27, i32 31> 5138 ret <32 x i8> %t3 5139} 5140 5141define <32 x i8> @PR55066(<32 x i8> %a0) { 5142; AVX1-LABEL: PR55066: 5143; AVX1: # %bb.0: 5144; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 5145; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [0,4,8,12,0,4,8,12,0,4,8,12,0,4,8,12] 5146; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 5147; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 5148; AVX1-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 5149; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 5150; AVX1-NEXT: retq 5151; 5152; AVX2-LABEL: PR55066: 5153; AVX2: # %bb.0: 5154; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u,16,20,24,28,u,u,u,u,u,u,u,u,u,u,u,u] 5155; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [0,4,0,4,0,4,0,4] 5156; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 5157; AVX2-NEXT: retq 5158; 5159; AVX512VLBW-LABEL: PR55066: 5160; AVX512VLBW: # %bb.0: 5161; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u,16,20,24,28,u,u,u,u,u,u,u,u,u,u,u,u] 5162; AVX512VLBW-NEXT: vpbroadcastq {{.*#+}} ymm1 = [0,4,0,4,0,4,0,4] 5163; AVX512VLBW-NEXT: vpermd %ymm0, %ymm1, %ymm0 5164; AVX512VLBW-NEXT: retq 5165; 5166; AVX512VLVBMI-LABEL: PR55066: 5167; AVX512VLVBMI: # %bb.0: 5168; AVX512VLVBMI-NEXT: vpbroadcastq {{.*#+}} ymm1 = [0,4,8,12,16,20,24,28,0,4,8,12,16,20,24,28,0,4,8,12,16,20,24,28,0,4,8,12,16,20,24,28] 5169; AVX512VLVBMI-NEXT: vpermb %ymm0, %ymm1, %ymm0 5170; AVX512VLVBMI-NEXT: retq 5171; 5172; XOPAVX1-LABEL: PR55066: 5173; XOPAVX1: # %bb.0: 5174; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 5175; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[0,4,8,12],xmm1[0,4,8,12],xmm0[0,4,8,12],xmm1[0,4,8,12] 5176; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 5177; XOPAVX1-NEXT: retq 5178; 5179; XOPAVX2-LABEL: PR55066: 5180; XOPAVX2: # %bb.0: 5181; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u,16,20,24,28,u,u,u,u,u,u,u,u,u,u,u,u] 5182; XOPAVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [0,4,0,4,0,4,0,4] 5183; XOPAVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 5184; XOPAVX2-NEXT: retq 5185 %shuffle = shufflevector <32 x i8> %a0, <32 x i8> poison, <32 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28, i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28, i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28, i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28> 5186 ret <32 x i8> %shuffle 5187} 5188 5189define <4 x i64> @PR66150(ptr %b) { 5190; AVX1-LABEL: PR66150: 5191; AVX1: # %bb.0: 5192; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 5193; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 5194; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,1,1,4,5,6,7] 5195; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,2,3,3,4,5,6,7] 5196; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 5197; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0,1,1,4,4,5,5] 5198; AVX1-NEXT: retq 5199; 5200; AVX2OR512VL-LABEL: PR66150: 5201; AVX2OR512VL: # %bb.0: 5202; AVX2OR512VL-NEXT: vpbroadcastd (%rdi), %ymm0 5203; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,18,18,18,18,18,18,18,18,19,19,19,19,19,19,19,19] 5204; AVX2OR512VL-NEXT: retq 5205; 5206; XOPAVX1-LABEL: PR66150: 5207; XOPAVX1: # %bb.0: 5208; XOPAVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 5209; XOPAVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 5210; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,1,1,4,5,6,7] 5211; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,2,3,3,4,5,6,7] 5212; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 5213; XOPAVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0,1,1,4,4,5,5] 5214; XOPAVX1-NEXT: retq 5215; 5216; XOPAVX2-LABEL: PR66150: 5217; XOPAVX2: # %bb.0: 5218; XOPAVX2-NEXT: vpbroadcastd (%rdi), %ymm0 5219; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,18,18,18,18,18,18,18,18,19,19,19,19,19,19,19,19] 5220; XOPAVX2-NEXT: retq 5221 %tmp1 = load i32, ptr %b, align 4 5222 %tmp2 = insertelement <8 x i32> poison, i32 %tmp1, i64 0 5223 %tmp3 = shufflevector <8 x i32> %tmp2, <8 x i32> poison, <8 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 0, i32 poison, i32 poison, i32 poison> 5224 %tmp4 = bitcast <8 x i32> %tmp3 to <32 x i8> 5225 %tmp5 = shufflevector <32 x i8> %tmp4, <32 x i8> poison, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19> 5226 %tmp6 = bitcast <32 x i8> %tmp5 to <4 x i64> 5227 ret <4 x i64> %tmp6 5228} 5229 5230define <64 x i8> @PR103564(<32 x i8> %a0, <32 x i8> %a1) { 5231; AVX1-LABEL: PR103564: 5232; AVX1: # %bb.0: 5233; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] 5234; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm3 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 5235; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 5236; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 5237; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 5238; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] 5239; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 5240; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm1 5241; AVX1-NEXT: vmovaps %ymm2, %ymm0 5242; AVX1-NEXT: retq 5243; 5244; AVX2-LABEL: PR103564: 5245; AVX2: # %bb.0: 5246; AVX2-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] 5247; AVX2-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] 5248; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm1[0,1],ymm2[0,1] 5249; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm1[2,3],ymm2[2,3] 5250; AVX2-NEXT: retq 5251; 5252; AVX512VLBW-LABEL: PR103564: 5253; AVX512VLBW: # %bb.0: 5254; AVX512VLBW-NEXT: vextracti128 $1, %ymm1, %xmm2 5255; AVX512VLBW-NEXT: vextracti128 $1, %ymm0, %xmm3 5256; AVX512VLBW-NEXT: vpunpckhbw {{.*#+}} xmm4 = xmm3[8],xmm2[8],xmm3[9],xmm2[9],xmm3[10],xmm2[10],xmm3[11],xmm2[11],xmm3[12],xmm2[12],xmm3[13],xmm2[13],xmm3[14],xmm2[14],xmm3[15],xmm2[15] 5257; AVX512VLBW-NEXT: vpunpcklbw {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] 5258; AVX512VLBW-NEXT: vinserti128 $1, %xmm4, %ymm2, %ymm2 5259; AVX512VLBW-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] 5260; AVX512VLBW-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 5261; AVX512VLBW-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0 5262; AVX512VLBW-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 5263; AVX512VLBW-NEXT: retq 5264; 5265; AVX512VLVBMI-LABEL: PR103564: 5266; AVX512VLVBMI: # %bb.0: 5267; AVX512VLVBMI-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 5268; AVX512VLVBMI-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 5269; AVX512VLVBMI-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,64,1,65,2,66,3,67,4,68,5,69,6,70,7,71,8,72,9,73,10,74,11,75,12,76,13,77,14,78,15,79,16,80,17,81,18,82,19,83,20,84,21,85,22,86,23,87,24,88,25,89,26,90,27,91,28,92,29,93,30,94,31,95] 5270; AVX512VLVBMI-NEXT: vpermt2b %zmm1, %zmm2, %zmm0 5271; AVX512VLVBMI-NEXT: retq 5272; 5273; XOPAVX1-LABEL: PR103564: 5274; XOPAVX1: # %bb.0: 5275; XOPAVX1-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] 5276; XOPAVX1-NEXT: vpunpcklbw {{.*#+}} xmm3 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 5277; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 5278; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 5279; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 5280; XOPAVX1-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] 5281; XOPAVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 5282; XOPAVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm1 5283; XOPAVX1-NEXT: vmovaps %ymm2, %ymm0 5284; XOPAVX1-NEXT: retq 5285; 5286; XOPAVX2-LABEL: PR103564: 5287; XOPAVX2: # %bb.0: 5288; XOPAVX2-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] 5289; XOPAVX2-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] 5290; XOPAVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm1[0,1],ymm2[0,1] 5291; XOPAVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm1[2,3],ymm2[2,3] 5292; XOPAVX2-NEXT: retq 5293 %r = shufflevector <32 x i8> %a0, <32 x i8> %a1, <64 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 8, i32 40, i32 9, i32 41, i32 10, i32 42, i32 11, i32 43, i32 12, i32 44, i32 13, i32 45, i32 14, i32 46, i32 15, i32 47, i32 16, i32 48, i32 17, i32 49, i32 18, i32 50, i32 19, i32 51, i32 20, i32 52, i32 21, i32 53, i32 22, i32 54, i32 23, i32 55, i32 24, i32 56, i32 25, i32 57, i32 26, i32 58, i32 27, i32 59, i32 28, i32 60, i32 29, i32 61, i32 30, i32 62, i32 31, i32 63> 5294 ret <64 x i8> %r 5295} 5296 5297define <32 x i8> @insert_dup_mem_v32i8_i32(ptr %ptr) { 5298; AVX1-LABEL: insert_dup_mem_v32i8_i32: 5299; AVX1: # %bb.0: 5300; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 5301; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 5302; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0 5303; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 5304; AVX1-NEXT: retq 5305; 5306; AVX2OR512VL-LABEL: insert_dup_mem_v32i8_i32: 5307; AVX2OR512VL: # %bb.0: 5308; AVX2OR512VL-NEXT: vpbroadcastb (%rdi), %ymm0 5309; AVX2OR512VL-NEXT: retq 5310; 5311; XOPAVX1-LABEL: insert_dup_mem_v32i8_i32: 5312; XOPAVX1: # %bb.0: 5313; XOPAVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 5314; XOPAVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 5315; XOPAVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0 5316; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 5317; XOPAVX1-NEXT: retq 5318; 5319; XOPAVX2-LABEL: insert_dup_mem_v32i8_i32: 5320; XOPAVX2: # %bb.0: 5321; XOPAVX2-NEXT: vpbroadcastb (%rdi), %ymm0 5322; XOPAVX2-NEXT: retq 5323 %tmp = load i32, ptr %ptr, align 4 5324 %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0 5325 %tmp2 = bitcast <4 x i32> %tmp1 to <16 x i8> 5326 %tmp3 = shufflevector <16 x i8> %tmp2, <16 x i8> poison, <32 x i32> zeroinitializer 5327 ret <32 x i8> %tmp3 5328} 5329 5330define <32 x i8> @insert_dup_mem_v32i8_sext_i8(ptr %ptr) { 5331; AVX1-LABEL: insert_dup_mem_v32i8_sext_i8: 5332; AVX1: # %bb.0: 5333; AVX1-NEXT: movzbl (%rdi), %eax 5334; AVX1-NEXT: vmovd %eax, %xmm0 5335; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 5336; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0 5337; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 5338; AVX1-NEXT: retq 5339; 5340; AVX2OR512VL-LABEL: insert_dup_mem_v32i8_sext_i8: 5341; AVX2OR512VL: # %bb.0: 5342; AVX2OR512VL-NEXT: vpbroadcastb (%rdi), %ymm0 5343; AVX2OR512VL-NEXT: retq 5344; 5345; XOPAVX1-LABEL: insert_dup_mem_v32i8_sext_i8: 5346; XOPAVX1: # %bb.0: 5347; XOPAVX1-NEXT: movzbl (%rdi), %eax 5348; XOPAVX1-NEXT: vmovd %eax, %xmm0 5349; XOPAVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 5350; XOPAVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0 5351; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 5352; XOPAVX1-NEXT: retq 5353; 5354; XOPAVX2-LABEL: insert_dup_mem_v32i8_sext_i8: 5355; XOPAVX2: # %bb.0: 5356; XOPAVX2-NEXT: vpbroadcastb (%rdi), %ymm0 5357; XOPAVX2-NEXT: retq 5358 %tmp = load i8, ptr %ptr, align 1 5359 %tmp1 = sext i8 %tmp to i32 5360 %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0 5361 %tmp3 = bitcast <4 x i32> %tmp2 to <16 x i8> 5362 %tmp4 = shufflevector <16 x i8> %tmp3, <16 x i8> poison, <32 x i32> zeroinitializer 5363 ret <32 x i8> %tmp4 5364} 5365 5366define <32 x i8> @insert_dup_elt1_mem_v32i8_i32(ptr %ptr) { 5367; AVX1-LABEL: insert_dup_elt1_mem_v32i8_i32: 5368; AVX1: # %bb.0: 5369; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 5370; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] 5371; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 5372; AVX1-NEXT: retq 5373; 5374; AVX2OR512VL-LABEL: insert_dup_elt1_mem_v32i8_i32: 5375; AVX2OR512VL: # %bb.0: 5376; AVX2OR512VL-NEXT: vpbroadcastb 1(%rdi), %ymm0 5377; AVX2OR512VL-NEXT: retq 5378; 5379; XOPAVX1-LABEL: insert_dup_elt1_mem_v32i8_i32: 5380; XOPAVX1: # %bb.0: 5381; XOPAVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 5382; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] 5383; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 5384; XOPAVX1-NEXT: retq 5385; 5386; XOPAVX2-LABEL: insert_dup_elt1_mem_v32i8_i32: 5387; XOPAVX2: # %bb.0: 5388; XOPAVX2-NEXT: vpbroadcastb 1(%rdi), %ymm0 5389; XOPAVX2-NEXT: retq 5390 %tmp = load i32, ptr %ptr, align 4 5391 %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0 5392 %tmp2 = bitcast <4 x i32> %tmp1 to <16 x i8> 5393 %tmp3 = shufflevector <16 x i8> %tmp2, <16 x i8> poison, <32 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 5394 ret <32 x i8> %tmp3 5395} 5396 5397define <32 x i8> @insert_dup_elt3_mem_v32i8_i32(ptr %ptr) { 5398; AVX1-LABEL: insert_dup_elt3_mem_v32i8_i32: 5399; AVX1: # %bb.0: 5400; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 5401; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3] 5402; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 5403; AVX1-NEXT: retq 5404; 5405; AVX2OR512VL-LABEL: insert_dup_elt3_mem_v32i8_i32: 5406; AVX2OR512VL: # %bb.0: 5407; AVX2OR512VL-NEXT: vpbroadcastb 3(%rdi), %ymm0 5408; AVX2OR512VL-NEXT: retq 5409; 5410; XOPAVX1-LABEL: insert_dup_elt3_mem_v32i8_i32: 5411; XOPAVX1: # %bb.0: 5412; XOPAVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 5413; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3] 5414; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 5415; XOPAVX1-NEXT: retq 5416; 5417; XOPAVX2-LABEL: insert_dup_elt3_mem_v32i8_i32: 5418; XOPAVX2: # %bb.0: 5419; XOPAVX2-NEXT: vpbroadcastb 3(%rdi), %ymm0 5420; XOPAVX2-NEXT: retq 5421 %tmp = load i32, ptr %ptr, align 4 5422 %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0 5423 %tmp2 = bitcast <4 x i32> %tmp1 to <16 x i8> 5424 %tmp3 = shufflevector <16 x i8> %tmp2, <16 x i8> poison, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 5425 ret <32 x i8> %tmp3 5426} 5427 5428define <32 x i8> @insert_dup_elt1_mem_v32i8_sext_i8(ptr %ptr) { 5429; AVX1-LABEL: insert_dup_elt1_mem_v32i8_sext_i8: 5430; AVX1: # %bb.0: 5431; AVX1-NEXT: movsbl (%rdi), %eax 5432; AVX1-NEXT: vmovd %eax, %xmm0 5433; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] 5434; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 5435; AVX1-NEXT: retq 5436; 5437; AVX2-LABEL: insert_dup_elt1_mem_v32i8_sext_i8: 5438; AVX2: # %bb.0: 5439; AVX2-NEXT: movsbl (%rdi), %eax 5440; AVX2-NEXT: shrl $8, %eax 5441; AVX2-NEXT: vmovd %eax, %xmm0 5442; AVX2-NEXT: vpbroadcastb %xmm0, %ymm0 5443; AVX2-NEXT: retq 5444; 5445; AVX512VL-LABEL: insert_dup_elt1_mem_v32i8_sext_i8: 5446; AVX512VL: # %bb.0: 5447; AVX512VL-NEXT: movsbl (%rdi), %eax 5448; AVX512VL-NEXT: shrl $8, %eax 5449; AVX512VL-NEXT: vpbroadcastb %eax, %ymm0 5450; AVX512VL-NEXT: retq 5451; 5452; XOPAVX1-LABEL: insert_dup_elt1_mem_v32i8_sext_i8: 5453; XOPAVX1: # %bb.0: 5454; XOPAVX1-NEXT: movsbl (%rdi), %eax 5455; XOPAVX1-NEXT: vmovd %eax, %xmm0 5456; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] 5457; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 5458; XOPAVX1-NEXT: retq 5459; 5460; XOPAVX2-LABEL: insert_dup_elt1_mem_v32i8_sext_i8: 5461; XOPAVX2: # %bb.0: 5462; XOPAVX2-NEXT: movsbl (%rdi), %eax 5463; XOPAVX2-NEXT: shrl $8, %eax 5464; XOPAVX2-NEXT: vmovd %eax, %xmm0 5465; XOPAVX2-NEXT: vpbroadcastb %xmm0, %ymm0 5466; XOPAVX2-NEXT: retq 5467 %tmp = load i8, ptr %ptr, align 1 5468 %tmp1 = sext i8 %tmp to i32 5469 %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0 5470 %tmp3 = bitcast <4 x i32> %tmp2 to <16 x i8> 5471 %tmp4 = shufflevector <16 x i8> %tmp3, <16 x i8> poison, <32 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 5472 ret <32 x i8> %tmp4 5473} 5474 5475define <32 x i8> @zeroable_src_to_zext(<32 x i8> %a0) { 5476; AVX1-LABEL: zeroable_src_to_zext: 5477; AVX1: # %bb.0: 5478; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 5479; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] 5480; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 5481; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 5482; AVX1-NEXT: retq 5483; 5484; AVX2OR512VL-LABEL: zeroable_src_to_zext: 5485; AVX2OR512VL: # %bb.0: 5486; AVX2OR512VL-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 5487; AVX2OR512VL-NEXT: retq 5488; 5489; XOPAVX1-LABEL: zeroable_src_to_zext: 5490; XOPAVX1: # %bb.0: 5491; XOPAVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 5492; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] 5493; XOPAVX1-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 5494; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 5495; XOPAVX1-NEXT: retq 5496; 5497; XOPAVX2-LABEL: zeroable_src_to_zext: 5498; XOPAVX2: # %bb.0: 5499; XOPAVX2-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 5500; XOPAVX2-NEXT: retq 5501 %1 = shufflevector <32 x i8> %a0, <32 x i8> poison, <32 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 5502 %2 = shufflevector <32 x i8> %1, <32 x i8> <i8 0, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 0, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison>, <32 x i32> <i32 8, i32 9, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 10, i32 11, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 20, i32 21, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 22, i32 23, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48> 5503 ret <32 x i8> %2 5504} 5505 5506define <32 x i8> @unpckh_v32i8(<32 x i8> %x, <32 x i8> %y) { 5507; AVX1-LABEL: unpckh_v32i8: 5508; AVX1: # %bb.0: 5509; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 5510; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] 5511; AVX1-NEXT: retq 5512; 5513; AVX2OR512VL-LABEL: unpckh_v32i8: 5514; AVX2OR512VL: # %bb.0: 5515; AVX2OR512VL-NEXT: vextracti128 $1, %ymm1, %xmm1 5516; AVX2OR512VL-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] 5517; AVX2OR512VL-NEXT: retq 5518; 5519; XOPAVX1-LABEL: unpckh_v32i8: 5520; XOPAVX1: # %bb.0: 5521; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 5522; XOPAVX1-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] 5523; XOPAVX1-NEXT: retq 5524; 5525; XOPAVX2-LABEL: unpckh_v32i8: 5526; XOPAVX2: # %bb.0: 5527; XOPAVX2-NEXT: vextracti128 $1, %ymm1, %xmm1 5528; XOPAVX2-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] 5529; XOPAVX2-NEXT: retq 5530 %unpckh = shufflevector <32 x i8> %x, <32 x i8> %y, <32 x i32> <i32 8, i32 56, i32 9, i32 57, i32 10, i32 58, i32 11, i32 59, i32 12, i32 60, i32 13, i32 61, i32 14, i32 62, i32 15, i32 63, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 5531 ret <32 x i8> %unpckh 5532} 5533 5534define <32 x i8> @shuffle_v16i16_zextinreg_to_v8i32(<32 x i8> %a) { 5535; AVX1-LABEL: shuffle_v16i16_zextinreg_to_v8i32: 5536; AVX1: # %bb.0: 5537; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 5538; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 5539; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 5540; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 5541; AVX1-NEXT: retq 5542; 5543; AVX2OR512VL-LABEL: shuffle_v16i16_zextinreg_to_v8i32: 5544; AVX2OR512VL: # %bb.0: 5545; AVX2OR512VL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 5546; AVX2OR512VL-NEXT: retq 5547; 5548; XOPAVX1-LABEL: shuffle_v16i16_zextinreg_to_v8i32: 5549; XOPAVX1: # %bb.0: 5550; XOPAVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 5551; XOPAVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 5552; XOPAVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 5553; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 5554; XOPAVX1-NEXT: retq 5555; 5556; XOPAVX2-LABEL: shuffle_v16i16_zextinreg_to_v8i32: 5557; XOPAVX2: # %bb.0: 5558; XOPAVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 5559; XOPAVX2-NEXT: retq 5560 %b = shufflevector <32 x i8> %a, <32 x i8> <i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 0, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison>, <32 x i32> <i32 0, i32 1, i32 42, i32 42, i32 2, i32 3, i32 42, i32 42, i32 4, i32 5, i32 42, i32 42, i32 6, i32 7, i32 42, i32 42, i32 8, i32 9, i32 42, i32 42, i32 10, i32 11, i32 42, i32 42, i32 12, i32 13, i32 42, i32 42, i32 14, i32 15, i32 42, i32 42> 5561 ret <32 x i8> %b 5562} 5563