1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=ALL,SSE,SSE2 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=ALL,SSE,SSSE3 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=ALL,SSE,SSE41 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=ALL,AVX,AVX1OR2,AVX1 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=ALL,AVX,AVX1OR2,AVX2OR512VL,AVX2,AVX2-SLOW 7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=ALL,AVX,AVX1OR2,AVX2OR512VL,AVX2,AVX2-FAST 8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=ALL,AVX,AVX1OR2,AVX2OR512VL,AVX2,AVX2-FAST 9; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512bw,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=ALL,AVX,AVX2OR512VL,AVX512VL,AVX512VLBW 10; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512bw,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=ALL,AVX,AVX2OR512VL,AVX512VL,AVX512VLBW 11; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512bw,+avx512vbmi,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=ALL,AVX,AVX2OR512VL,AVX512VL,AVX512VLVBMI 12; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512bw,+avx512vbmi,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=ALL,AVX,AVX2OR512VL,AVX512VL,AVX512VLVBMI 13; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx | FileCheck %s --check-prefixes=ALL,AVX,AVX1OR2,XOP,XOPAVX1 14; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx2 | FileCheck %s --check-prefixes=ALL,AVX,AVX1OR2,XOP,XOPAVX2 15 16define <16 x i8> @shuffle_v16i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<16 x i8> %a, <16 x i8> %b) { 17; SSE2-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 18; SSE2: # %bb.0: 19; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 20; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 21; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 22; SSE2-NEXT: retq 23; 24; SSSE3-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 25; SSSE3: # %bb.0: 26; SSSE3-NEXT: pxor %xmm1, %xmm1 27; SSSE3-NEXT: pshufb %xmm1, %xmm0 28; SSSE3-NEXT: retq 29; 30; SSE41-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 31; SSE41: # %bb.0: 32; SSE41-NEXT: pxor %xmm1, %xmm1 33; SSE41-NEXT: pshufb %xmm1, %xmm0 34; SSE41-NEXT: retq 35; 36; AVX1-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 37; AVX1: # %bb.0: 38; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 39; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0 40; AVX1-NEXT: retq 41; 42; AVX2OR512VL-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 43; AVX2OR512VL: # %bb.0: 44; AVX2OR512VL-NEXT: vpbroadcastb %xmm0, %xmm0 45; AVX2OR512VL-NEXT: retq 46; 47; XOPAVX1-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 48; XOPAVX1: # %bb.0: 49; XOPAVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 50; XOPAVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0 51; XOPAVX1-NEXT: retq 52; 53; XOPAVX2-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 54; XOPAVX2: # %bb.0: 55; XOPAVX2-NEXT: vpbroadcastb %xmm0, %xmm0 56; XOPAVX2-NEXT: retq 57 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 58 ret <16 x i8> %shuffle 59} 60 61define <16 x i8> @shuffle_v16i8_00_00_00_00_00_00_00_00_01_01_01_01_01_01_01_01(<16 x i8> %a, <16 x i8> %b) { 62; SSE2-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_01_01_01_01_01_01_01_01: 63; SSE2: # %bb.0: 64; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 65; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,1,1,4,5,6,7] 66; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 67; SSE2-NEXT: retq 68; 69; SSSE3-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_01_01_01_01_01_01_01_01: 70; SSSE3: # %bb.0: 71; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1] 72; SSSE3-NEXT: retq 73; 74; SSE41-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_01_01_01_01_01_01_01_01: 75; SSE41: # %bb.0: 76; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1] 77; SSE41-NEXT: retq 78; 79; AVX-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_01_01_01_01_01_01_01_01: 80; AVX: # %bb.0: 81; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1] 82; AVX-NEXT: retq 83 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 84 ret <16 x i8> %shuffle 85} 86 87define <16 x i8> @shuffle_v16i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08(<16 x i8> %a, <16 x i8> %b) { 88; SSE2-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08: 89; SSE2: # %bb.0: 90; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 91; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] 92; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 93; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7] 94; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 95; SSE2-NEXT: retq 96; 97; SSSE3-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08: 98; SSSE3: # %bb.0: 99; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8] 100; SSSE3-NEXT: retq 101; 102; SSE41-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08: 103; SSE41: # %bb.0: 104; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8] 105; SSE41-NEXT: retq 106; 107; AVX-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08: 108; AVX: # %bb.0: 109; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8] 110; AVX-NEXT: retq 111 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8> 112 ret <16 x i8> %shuffle 113} 114 115define <16 x i8> @shuffle_v16i8_00_00_00_00_01_01_01_01_02_02_02_02_03_03_03_03(<16 x i8> %a, <16 x i8> %b) { 116; SSE-LABEL: shuffle_v16i8_00_00_00_00_01_01_01_01_02_02_02_02_03_03_03_03: 117; SSE: # %bb.0: 118; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 119; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 120; SSE-NEXT: retq 121; 122; AVX1-LABEL: shuffle_v16i8_00_00_00_00_01_01_01_01_02_02_02_02_03_03_03_03: 123; AVX1: # %bb.0: 124; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 125; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 126; AVX1-NEXT: retq 127; 128; AVX2-SLOW-LABEL: shuffle_v16i8_00_00_00_00_01_01_01_01_02_02_02_02_03_03_03_03: 129; AVX2-SLOW: # %bb.0: 130; AVX2-SLOW-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 131; AVX2-SLOW-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 132; AVX2-SLOW-NEXT: retq 133; 134; AVX2-FAST-LABEL: shuffle_v16i8_00_00_00_00_01_01_01_01_02_02_02_02_03_03_03_03: 135; AVX2-FAST: # %bb.0: 136; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3] 137; AVX2-FAST-NEXT: retq 138; 139; AVX512VL-LABEL: shuffle_v16i8_00_00_00_00_01_01_01_01_02_02_02_02_03_03_03_03: 140; AVX512VL: # %bb.0: 141; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3] 142; AVX512VL-NEXT: retq 143; 144; XOP-LABEL: shuffle_v16i8_00_00_00_00_01_01_01_01_02_02_02_02_03_03_03_03: 145; XOP: # %bb.0: 146; XOP-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 147; XOP-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 148; XOP-NEXT: retq 149 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3> 150 ret <16 x i8> %shuffle 151} 152 153define <16 x i8> @shuffle_v16i8_04_04_04_04_05_05_05_05_06_06_06_06_07_07_07_07(<16 x i8> %a, <16 x i8> %b) { 154; SSE-LABEL: shuffle_v16i8_04_04_04_04_05_05_05_05_06_06_06_06_07_07_07_07: 155; SSE: # %bb.0: 156; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 157; SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7] 158; SSE-NEXT: retq 159; 160; AVX1-LABEL: shuffle_v16i8_04_04_04_04_05_05_05_05_06_06_06_06_07_07_07_07: 161; AVX1: # %bb.0: 162; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 163; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7] 164; AVX1-NEXT: retq 165; 166; AVX2-SLOW-LABEL: shuffle_v16i8_04_04_04_04_05_05_05_05_06_06_06_06_07_07_07_07: 167; AVX2-SLOW: # %bb.0: 168; AVX2-SLOW-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 169; AVX2-SLOW-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7] 170; AVX2-SLOW-NEXT: retq 171; 172; AVX2-FAST-LABEL: shuffle_v16i8_04_04_04_04_05_05_05_05_06_06_06_06_07_07_07_07: 173; AVX2-FAST: # %bb.0: 174; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,4,4,4,5,5,5,5,6,6,6,6,7,7,7,7] 175; AVX2-FAST-NEXT: retq 176; 177; AVX512VL-LABEL: shuffle_v16i8_04_04_04_04_05_05_05_05_06_06_06_06_07_07_07_07: 178; AVX512VL: # %bb.0: 179; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,4,4,4,5,5,5,5,6,6,6,6,7,7,7,7] 180; AVX512VL-NEXT: retq 181; 182; XOP-LABEL: shuffle_v16i8_04_04_04_04_05_05_05_05_06_06_06_06_07_07_07_07: 183; XOP: # %bb.0: 184; XOP-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 185; XOP-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7] 186; XOP-NEXT: retq 187 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7> 188 ret <16 x i8> %shuffle 189} 190 191define <16 x i8> @shuffle_v16i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12(<16 x i8> %a, <16 x i8> %b) { 192; SSE2-LABEL: shuffle_v16i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12: 193; SSE2: # %bb.0: 194; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] 195; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7] 196; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 197; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 198; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7] 199; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,6] 200; SSE2-NEXT: retq 201; 202; SSSE3-LABEL: shuffle_v16i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12: 203; SSSE3: # %bb.0: 204; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,4,4,4,4,8,8,8,8,12,12,12,12] 205; SSSE3-NEXT: retq 206; 207; SSE41-LABEL: shuffle_v16i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12: 208; SSE41: # %bb.0: 209; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,4,4,4,4,8,8,8,8,12,12,12,12] 210; SSE41-NEXT: retq 211; 212; AVX-LABEL: shuffle_v16i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12: 213; AVX: # %bb.0: 214; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,4,4,4,4,8,8,8,8,12,12,12,12] 215; AVX-NEXT: retq 216 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4, i32 8, i32 8, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12> 217 ret <16 x i8> %shuffle 218} 219 220define <16 x i8> @shuffle_v16i8_00_00_01_01_02_02_03_03_04_04_05_05_06_06_07_07(<16 x i8> %a, <16 x i8> %b) { 221; SSE-LABEL: shuffle_v16i8_00_00_01_01_02_02_03_03_04_04_05_05_06_06_07_07: 222; SSE: # %bb.0: 223; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 224; SSE-NEXT: retq 225; 226; AVX-LABEL: shuffle_v16i8_00_00_01_01_02_02_03_03_04_04_05_05_06_06_07_07: 227; AVX: # %bb.0: 228; AVX-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 229; AVX-NEXT: retq 230 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7> 231 ret <16 x i8> %shuffle 232} 233 234define <16 x i8> @shuffle_v16i8_0101010101010101(<16 x i8> %a, <16 x i8> %b) { 235; SSE-LABEL: shuffle_v16i8_0101010101010101: 236; SSE: # %bb.0: 237; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 238; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 239; SSE-NEXT: retq 240; 241; AVX1-LABEL: shuffle_v16i8_0101010101010101: 242; AVX1: # %bb.0: 243; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 244; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 245; AVX1-NEXT: retq 246; 247; AVX2OR512VL-LABEL: shuffle_v16i8_0101010101010101: 248; AVX2OR512VL: # %bb.0: 249; AVX2OR512VL-NEXT: vpbroadcastw %xmm0, %xmm0 250; AVX2OR512VL-NEXT: retq 251; 252; XOPAVX1-LABEL: shuffle_v16i8_0101010101010101: 253; XOPAVX1: # %bb.0: 254; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 255; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 256; XOPAVX1-NEXT: retq 257; 258; XOPAVX2-LABEL: shuffle_v16i8_0101010101010101: 259; XOPAVX2: # %bb.0: 260; XOPAVX2-NEXT: vpbroadcastw %xmm0, %xmm0 261; XOPAVX2-NEXT: retq 262 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 263 ret <16 x i8> %shuffle 264} 265 266define <16 x i8> @shuffle_v16i8_00_16_01_17_02_18_03_19_04_20_05_21_06_22_07_23(<16 x i8> %a, <16 x i8> %b) { 267; SSE-LABEL: shuffle_v16i8_00_16_01_17_02_18_03_19_04_20_05_21_06_22_07_23: 268; SSE: # %bb.0: 269; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 270; SSE-NEXT: retq 271; 272; AVX-LABEL: shuffle_v16i8_00_16_01_17_02_18_03_19_04_20_05_21_06_22_07_23: 273; AVX: # %bb.0: 274; AVX-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 275; AVX-NEXT: retq 276 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23> 277 ret <16 x i8> %shuffle 278} 279 280define <16 x i8> @shuffle_v16i8_08_24_09_25_10_26_11_27_12_28_13_29_14_30_15_31(<16 x i8> %a, <16 x i8> %b) { 281; SSE-LABEL: shuffle_v16i8_08_24_09_25_10_26_11_27_12_28_13_29_14_30_15_31: 282; SSE: # %bb.0: 283; SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] 284; SSE-NEXT: retq 285; 286; AVX-LABEL: shuffle_v16i8_08_24_09_25_10_26_11_27_12_28_13_29_14_30_15_31: 287; AVX: # %bb.0: 288; AVX-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] 289; AVX-NEXT: retq 290 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31> 291 ret <16 x i8> %shuffle 292} 293 294define <16 x i8> @shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07(<16 x i8> %a, <16 x i8> %b) { 295; SSE-LABEL: shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07: 296; SSE: # %bb.0: 297; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 298; SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7] 299; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 300; SSE-NEXT: movdqa %xmm1, %xmm0 301; SSE-NEXT: retq 302; 303; AVX1-LABEL: shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07: 304; AVX1: # %bb.0: 305; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 306; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7] 307; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 308; AVX1-NEXT: retq 309; 310; AVX2OR512VL-LABEL: shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07: 311; AVX2OR512VL: # %bb.0: 312; AVX2OR512VL-NEXT: vpbroadcastb %xmm1, %xmm1 313; AVX2OR512VL-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 314; AVX2OR512VL-NEXT: retq 315; 316; XOPAVX1-LABEL: shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07: 317; XOPAVX1: # %bb.0: 318; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[0],xmm0[1],xmm1[0],xmm0[2],xmm1[0],xmm0[3],xmm1[0],xmm0[4],xmm1[0],xmm0[5],xmm1[0],xmm0[6],xmm1[0],xmm0[7] 319; XOPAVX1-NEXT: retq 320; 321; XOPAVX2-LABEL: shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07: 322; XOPAVX2: # %bb.0: 323; XOPAVX2-NEXT: vpbroadcastb %xmm1, %xmm1 324; XOPAVX2-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 325; XOPAVX2-NEXT: retq 326 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 16, i32 0, i32 16, i32 1, i32 16, i32 2, i32 16, i32 3, i32 16, i32 4, i32 16, i32 5, i32 16, i32 6, i32 16, i32 7> 327 ret <16 x i8> %shuffle 328} 329 330define <16 x i8> @shuffle_v16i8_15_14_13_12_11_10_09_08_07_06_05_04_03_02_01_00(<16 x i8> %a, <16 x i8> %b) { 331; SSE2-LABEL: shuffle_v16i8_15_14_13_12_11_10_09_08_07_06_05_04_03_02_01_00: 332; SSE2: # %bb.0: 333; SSE2-NEXT: pxor %xmm1, %xmm1 334; SSE2-NEXT: movdqa %xmm0, %xmm2 335; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] 336; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1] 337; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[3,2,1,0,4,5,6,7] 338; SSE2-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,7,6,5,4] 339; SSE2-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] 340; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 341; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7] 342; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4] 343; SSE2-NEXT: packuswb %xmm2, %xmm0 344; SSE2-NEXT: retq 345; 346; SSSE3-LABEL: shuffle_v16i8_15_14_13_12_11_10_09_08_07_06_05_04_03_02_01_00: 347; SSSE3: # %bb.0: 348; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0] 349; SSSE3-NEXT: retq 350; 351; SSE41-LABEL: shuffle_v16i8_15_14_13_12_11_10_09_08_07_06_05_04_03_02_01_00: 352; SSE41: # %bb.0: 353; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0] 354; SSE41-NEXT: retq 355; 356; AVX-LABEL: shuffle_v16i8_15_14_13_12_11_10_09_08_07_06_05_04_03_02_01_00: 357; AVX: # %bb.0: 358; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0] 359; AVX-NEXT: retq 360 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 361 ret <16 x i8> %shuffle 362} 363 364define <16 x i8> @shuffle_v16i8_07_06_05_04_03_02_01_00_15_14_13_12_11_10_09_08(<16 x i8> %a, <16 x i8> %b) { 365; SSE2-LABEL: shuffle_v16i8_07_06_05_04_03_02_01_00_15_14_13_12_11_10_09_08: 366; SSE2: # %bb.0: 367; SSE2-NEXT: pxor %xmm1, %xmm1 368; SSE2-NEXT: movdqa %xmm0, %xmm2 369; SSE2-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15] 370; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1] 371; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[3,2,1,0,4,5,6,7] 372; SSE2-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,7,6,5,4] 373; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 374; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 375; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7] 376; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4] 377; SSE2-NEXT: packuswb %xmm2, %xmm0 378; SSE2-NEXT: retq 379; 380; SSSE3-LABEL: shuffle_v16i8_07_06_05_04_03_02_01_00_15_14_13_12_11_10_09_08: 381; SSSE3: # %bb.0: 382; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8] 383; SSSE3-NEXT: retq 384; 385; SSE41-LABEL: shuffle_v16i8_07_06_05_04_03_02_01_00_15_14_13_12_11_10_09_08: 386; SSE41: # %bb.0: 387; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8] 388; SSE41-NEXT: retq 389; 390; AVX-LABEL: shuffle_v16i8_07_06_05_04_03_02_01_00_15_14_13_12_11_10_09_08: 391; AVX: # %bb.0: 392; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8] 393; AVX-NEXT: retq 394 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8> 395 ret <16 x i8> %shuffle 396} 397 398define <16 x i8> @shuffle_v16i8_03_02_01_00_07_06_05_04_11_10_09_08_15_14_13_12(<16 x i8> %a, <16 x i8> %b) { 399; SSE2-LABEL: shuffle_v16i8_03_02_01_00_07_06_05_04_11_10_09_08_15_14_13_12: 400; SSE2: # %bb.0: 401; SSE2-NEXT: pxor %xmm1, %xmm1 402; SSE2-NEXT: movdqa %xmm0, %xmm2 403; SSE2-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15] 404; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[3,2,1,0,4,5,6,7] 405; SSE2-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,7,6,5,4] 406; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 407; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7] 408; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4] 409; SSE2-NEXT: packuswb %xmm2, %xmm0 410; SSE2-NEXT: retq 411; 412; SSSE3-LABEL: shuffle_v16i8_03_02_01_00_07_06_05_04_11_10_09_08_15_14_13_12: 413; SSSE3: # %bb.0: 414; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12] 415; SSSE3-NEXT: retq 416; 417; SSE41-LABEL: shuffle_v16i8_03_02_01_00_07_06_05_04_11_10_09_08_15_14_13_12: 418; SSE41: # %bb.0: 419; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12] 420; SSE41-NEXT: retq 421; 422; AVX-LABEL: shuffle_v16i8_03_02_01_00_07_06_05_04_11_10_09_08_15_14_13_12: 423; AVX: # %bb.0: 424; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12] 425; AVX-NEXT: retq 426 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12> 427 ret <16 x i8> %shuffle 428} 429 430define <16 x i8> @shuffle_v16i8_01_00_03_02_05_04_07_06_09_08_11_10_13_12_15_14(<16 x i8> %a, <16 x i8> %b) { 431; SSE2-LABEL: shuffle_v16i8_01_00_03_02_05_04_07_06_09_08_11_10_13_12_15_14: 432; SSE2: # %bb.0: 433; SSE2-NEXT: movdqa %xmm0, %xmm1 434; SSE2-NEXT: psrlw $8, %xmm1 435; SSE2-NEXT: psllw $8, %xmm0 436; SSE2-NEXT: por %xmm1, %xmm0 437; SSE2-NEXT: retq 438; 439; SSSE3-LABEL: shuffle_v16i8_01_00_03_02_05_04_07_06_09_08_11_10_13_12_15_14: 440; SSSE3: # %bb.0: 441; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14] 442; SSSE3-NEXT: retq 443; 444; SSE41-LABEL: shuffle_v16i8_01_00_03_02_05_04_07_06_09_08_11_10_13_12_15_14: 445; SSE41: # %bb.0: 446; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14] 447; SSE41-NEXT: retq 448; 449; AVX1-LABEL: shuffle_v16i8_01_00_03_02_05_04_07_06_09_08_11_10_13_12_15_14: 450; AVX1: # %bb.0: 451; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14] 452; AVX1-NEXT: retq 453; 454; AVX2OR512VL-LABEL: shuffle_v16i8_01_00_03_02_05_04_07_06_09_08_11_10_13_12_15_14: 455; AVX2OR512VL: # %bb.0: 456; AVX2OR512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14] 457; AVX2OR512VL-NEXT: retq 458; 459; XOP-LABEL: shuffle_v16i8_01_00_03_02_05_04_07_06_09_08_11_10_13_12_15_14: 460; XOP: # %bb.0: 461; XOP-NEXT: vprotw $8, %xmm0, %xmm0 462; XOP-NEXT: retq 463 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14> 464 ret <16 x i8> %shuffle 465} 466 467define <16 x i8> @shuffle_v16i8_03_02_01_00_07_06_05_04_19_18_17_16_23_22_21_20(<16 x i8> %a, <16 x i8> %b) { 468; SSE2-LABEL: shuffle_v16i8_03_02_01_00_07_06_05_04_19_18_17_16_23_22_21_20: 469; SSE2: # %bb.0: 470; SSE2-NEXT: pxor %xmm2, %xmm2 471; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 472; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[3,2,1,0,4,5,6,7] 473; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,6,5,4] 474; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] 475; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7] 476; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4] 477; SSE2-NEXT: packuswb %xmm1, %xmm0 478; SSE2-NEXT: retq 479; 480; SSSE3-LABEL: shuffle_v16i8_03_02_01_00_07_06_05_04_19_18_17_16_23_22_21_20: 481; SSSE3: # %bb.0: 482; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 483; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[6,4,2,0,14,12,10,8,7,5,3,1,15,13,11,9] 484; SSSE3-NEXT: retq 485; 486; SSE41-LABEL: shuffle_v16i8_03_02_01_00_07_06_05_04_19_18_17_16_23_22_21_20: 487; SSE41: # %bb.0: 488; SSE41-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 489; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[6,4,2,0,14,12,10,8,7,5,3,1,15,13,11,9] 490; SSE41-NEXT: retq 491; 492; AVX1-LABEL: shuffle_v16i8_03_02_01_00_07_06_05_04_19_18_17_16_23_22_21_20: 493; AVX1: # %bb.0: 494; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 495; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,4,2,0,14,12,10,8,7,5,3,1,15,13,11,9] 496; AVX1-NEXT: retq 497; 498; AVX2-LABEL: shuffle_v16i8_03_02_01_00_07_06_05_04_19_18_17_16_23_22_21_20: 499; AVX2: # %bb.0: 500; AVX2-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 501; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,4,2,0,14,12,10,8,7,5,3,1,15,13,11,9] 502; AVX2-NEXT: retq 503; 504; AVX512VLBW-LABEL: shuffle_v16i8_03_02_01_00_07_06_05_04_19_18_17_16_23_22_21_20: 505; AVX512VLBW: # %bb.0: 506; AVX512VLBW-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 507; AVX512VLBW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,4,2,0,14,12,10,8,7,5,3,1,15,13,11,9] 508; AVX512VLBW-NEXT: retq 509; 510; AVX512VLVBMI-LABEL: shuffle_v16i8_03_02_01_00_07_06_05_04_19_18_17_16_23_22_21_20: 511; AVX512VLVBMI: # %bb.0: 512; AVX512VLVBMI-NEXT: vmovdqa {{.*#+}} xmm2 = [3,2,1,0,7,6,5,4,19,18,17,16,23,22,21,20] 513; AVX512VLVBMI-NEXT: vpermt2b %xmm1, %xmm2, %xmm0 514; AVX512VLVBMI-NEXT: retq 515; 516; XOP-LABEL: shuffle_v16i8_03_02_01_00_07_06_05_04_19_18_17_16_23_22_21_20: 517; XOP: # %bb.0: 518; XOP-NEXT: vpperm {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4],xmm1[3,2,1,0,7,6,5,4] 519; XOP-NEXT: retq 520 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 19, i32 18, i32 17, i32 16, i32 23, i32 22, i32 21, i32 20> 521 ret <16 x i8> %shuffle 522} 523 524define <16 x i8> @shuffle_v16i8_03_02_01_00_31_30_29_28_11_10_09_08_23_22_21_20(<16 x i8> %a, <16 x i8> %b) { 525; SSE2-LABEL: shuffle_v16i8_03_02_01_00_31_30_29_28_11_10_09_08_23_22_21_20: 526; SSE2: # %bb.0: 527; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3] 528; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 529; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 530; SSE2-NEXT: pxor %xmm1, %xmm1 531; SSE2-NEXT: movdqa %xmm0, %xmm2 532; SSE2-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15] 533; SSE2-NEXT: pshuflw {{.*#+}} xmm3 = xmm2[3,2,1,0,4,5,6,7] 534; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 535; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,7,6,5,4] 536; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm3[0],xmm1[1] 537; SSE2-NEXT: pshuflw {{.*#+}} xmm3 = xmm0[3,2,1,0,4,5,6,7] 538; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm2[0,1,2,3,7,6,5,4] 539; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm3[0],xmm0[1] 540; SSE2-NEXT: packuswb %xmm1, %xmm0 541; SSE2-NEXT: retq 542; 543; SSSE3-LABEL: shuffle_v16i8_03_02_01_00_31_30_29_28_11_10_09_08_23_22_21_20: 544; SSSE3: # %bb.0: 545; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[15,14,13,12,7,6,5,4,u,u,u,u,u,u,u,u] 546; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,11,10,9,8,u,u,u,u,u,u,u,u] 547; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 548; SSSE3-NEXT: retq 549; 550; SSE41-LABEL: shuffle_v16i8_03_02_01_00_31_30_29_28_11_10_09_08_23_22_21_20: 551; SSE41: # %bb.0: 552; SSE41-NEXT: pshufb {{.*#+}} xmm1 = xmm1[15,14,13,12,7,6,5,4,u,u,u,u,u,u,u,u] 553; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,11,10,9,8,u,u,u,u,u,u,u,u] 554; SSE41-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 555; SSE41-NEXT: retq 556; 557; AVX1-LABEL: shuffle_v16i8_03_02_01_00_31_30_29_28_11_10_09_08_23_22_21_20: 558; AVX1: # %bb.0: 559; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[15,14,13,12,7,6,5,4,u,u,u,u,u,u,u,u] 560; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,11,10,9,8,u,u,u,u,u,u,u,u] 561; AVX1-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 562; AVX1-NEXT: retq 563; 564; AVX2-LABEL: shuffle_v16i8_03_02_01_00_31_30_29_28_11_10_09_08_23_22_21_20: 565; AVX2: # %bb.0: 566; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[15,14,13,12,7,6,5,4,u,u,u,u,u,u,u,u] 567; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,11,10,9,8,u,u,u,u,u,u,u,u] 568; AVX2-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 569; AVX2-NEXT: retq 570; 571; AVX512VLBW-LABEL: shuffle_v16i8_03_02_01_00_31_30_29_28_11_10_09_08_23_22_21_20: 572; AVX512VLBW: # %bb.0: 573; AVX512VLBW-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[15,14,13,12,7,6,5,4,u,u,u,u,u,u,u,u] 574; AVX512VLBW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,11,10,9,8,u,u,u,u,u,u,u,u] 575; AVX512VLBW-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 576; AVX512VLBW-NEXT: retq 577; 578; AVX512VLVBMI-LABEL: shuffle_v16i8_03_02_01_00_31_30_29_28_11_10_09_08_23_22_21_20: 579; AVX512VLVBMI: # %bb.0: 580; AVX512VLVBMI-NEXT: vmovdqa {{.*#+}} xmm2 = [3,2,1,0,31,30,29,28,11,10,9,8,23,22,21,20] 581; AVX512VLVBMI-NEXT: vpermt2b %xmm1, %xmm2, %xmm0 582; AVX512VLVBMI-NEXT: retq 583; 584; XOP-LABEL: shuffle_v16i8_03_02_01_00_31_30_29_28_11_10_09_08_23_22_21_20: 585; XOP: # %bb.0: 586; XOP-NEXT: vpperm {{.*#+}} xmm0 = xmm0[3,2,1,0],xmm1[15,14,13,12],xmm0[11,10,9,8],xmm1[7,6,5,4] 587; XOP-NEXT: retq 588 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 31, i32 30, i32 29, i32 28, i32 11, i32 10, i32 9, i32 8, i32 23, i32 22, i32 21, i32 20> 589 ret <16 x i8> %shuffle 590} 591 592define <16 x i8> @shuffle_v16i8_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31(<16 x i8> %a, <16 x i8> %b) { 593; SSE2-LABEL: shuffle_v16i8_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31: 594; SSE2: # %bb.0: 595; SSE2-NEXT: movaps {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] 596; SSE2-NEXT: andps %xmm2, %xmm0 597; SSE2-NEXT: andnps %xmm1, %xmm2 598; SSE2-NEXT: orps %xmm2, %xmm0 599; SSE2-NEXT: retq 600; 601; SSSE3-LABEL: shuffle_v16i8_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31: 602; SSSE3: # %bb.0: 603; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[1,3,5,7,9,11,13,15,u,u,u,u,u,u,u,u] 604; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] 605; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 606; SSSE3-NEXT: retq 607; 608; SSE41-LABEL: shuffle_v16i8_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31: 609; SSE41: # %bb.0: 610; SSE41-NEXT: movdqa %xmm0, %xmm2 611; SSE41-NEXT: movaps {{.*#+}} xmm0 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] 612; SSE41-NEXT: pblendvb %xmm0, %xmm2, %xmm1 613; SSE41-NEXT: movdqa %xmm1, %xmm0 614; SSE41-NEXT: retq 615; 616; AVX1-LABEL: shuffle_v16i8_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31: 617; AVX1: # %bb.0: 618; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] 619; AVX1-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0 620; AVX1-NEXT: retq 621; 622; AVX2-LABEL: shuffle_v16i8_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31: 623; AVX2: # %bb.0: 624; AVX2-NEXT: vpbroadcastw {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] 625; AVX2-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0 626; AVX2-NEXT: retq 627; 628; AVX512VL-LABEL: shuffle_v16i8_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31: 629; AVX512VL: # %bb.0: 630; AVX512VL-NEXT: movw $-21846, %ax # imm = 0xAAAA 631; AVX512VL-NEXT: kmovd %eax, %k1 632; AVX512VL-NEXT: vmovdqu8 %xmm1, %xmm0 {%k1} 633; AVX512VL-NEXT: retq 634; 635; XOPAVX1-LABEL: shuffle_v16i8_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31: 636; XOPAVX1: # %bb.0: 637; XOPAVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] 638; XOPAVX1-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0 639; XOPAVX1-NEXT: retq 640; 641; XOPAVX2-LABEL: shuffle_v16i8_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31: 642; XOPAVX2: # %bb.0: 643; XOPAVX2-NEXT: vpbroadcastw {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] 644; XOPAVX2-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0 645; XOPAVX2-NEXT: retq 646 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31> 647 ret <16 x i8> %shuffle 648} 649 650define <16 x i8> @shuffle_v16i8_00_01_02_19_04_05_06_23_08_09_10_27_12_13_14_31(<16 x i8> %a, <16 x i8> %b) { 651; SSE2-LABEL: shuffle_v16i8_00_01_02_19_04_05_06_23_08_09_10_27_12_13_14_31: 652; SSE2: # %bb.0: 653; SSE2-NEXT: movaps {{.*#+}} xmm2 = [255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0] 654; SSE2-NEXT: andps %xmm2, %xmm0 655; SSE2-NEXT: andnps %xmm1, %xmm2 656; SSE2-NEXT: orps %xmm2, %xmm0 657; SSE2-NEXT: retq 658; 659; SSSE3-LABEL: shuffle_v16i8_00_01_02_19_04_05_06_23_08_09_10_27_12_13_14_31: 660; SSSE3: # %bb.0: 661; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[15] 662; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2],zero,xmm0[4,5,6],zero,xmm0[8,9,10],zero,xmm0[12,13,14],zero 663; SSSE3-NEXT: por %xmm1, %xmm0 664; SSSE3-NEXT: retq 665; 666; SSE41-LABEL: shuffle_v16i8_00_01_02_19_04_05_06_23_08_09_10_27_12_13_14_31: 667; SSE41: # %bb.0: 668; SSE41-NEXT: movdqa %xmm0, %xmm2 669; SSE41-NEXT: movaps {{.*#+}} xmm0 = [255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0] 670; SSE41-NEXT: pblendvb %xmm0, %xmm2, %xmm1 671; SSE41-NEXT: movdqa %xmm1, %xmm0 672; SSE41-NEXT: retq 673; 674; AVX1-LABEL: shuffle_v16i8_00_01_02_19_04_05_06_23_08_09_10_27_12_13_14_31: 675; AVX1: # %bb.0: 676; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0] 677; AVX1-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0 678; AVX1-NEXT: retq 679; 680; AVX2-LABEL: shuffle_v16i8_00_01_02_19_04_05_06_23_08_09_10_27_12_13_14_31: 681; AVX2: # %bb.0: 682; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0] 683; AVX2-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0 684; AVX2-NEXT: retq 685; 686; AVX512VL-LABEL: shuffle_v16i8_00_01_02_19_04_05_06_23_08_09_10_27_12_13_14_31: 687; AVX512VL: # %bb.0: 688; AVX512VL-NEXT: movw $-30584, %ax # imm = 0x8888 689; AVX512VL-NEXT: kmovd %eax, %k1 690; AVX512VL-NEXT: vmovdqu8 %xmm1, %xmm0 {%k1} 691; AVX512VL-NEXT: retq 692; 693; XOPAVX1-LABEL: shuffle_v16i8_00_01_02_19_04_05_06_23_08_09_10_27_12_13_14_31: 694; XOPAVX1: # %bb.0: 695; XOPAVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0] 696; XOPAVX1-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0 697; XOPAVX1-NEXT: retq 698; 699; XOPAVX2-LABEL: shuffle_v16i8_00_01_02_19_04_05_06_23_08_09_10_27_12_13_14_31: 700; XOPAVX2: # %bb.0: 701; XOPAVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0] 702; XOPAVX2-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0 703; XOPAVX2-NEXT: retq 704 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 19, i32 4, i32 5, i32 6, i32 23, i32 8, i32 9, i32 10, i32 27, i32 12, i32 13, i32 14, i32 31> 705 ret <16 x i8> %shuffle 706} 707 708define <16 x i8> @shuffle_v16i8_00_01_02_zz_04_05_06_zz_08_09_10_zz_12_13_14_zz(<16 x i8> %a) { 709; SSE-LABEL: shuffle_v16i8_00_01_02_zz_04_05_06_zz_08_09_10_zz_12_13_14_zz: 710; SSE: # %bb.0: 711; SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 712; SSE-NEXT: retq 713; 714; AVX1OR2-LABEL: shuffle_v16i8_00_01_02_zz_04_05_06_zz_08_09_10_zz_12_13_14_zz: 715; AVX1OR2: # %bb.0: 716; AVX1OR2-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 717; AVX1OR2-NEXT: retq 718; 719; AVX512VL-LABEL: shuffle_v16i8_00_01_02_zz_04_05_06_zz_08_09_10_zz_12_13_14_zz: 720; AVX512VL: # %bb.0: 721; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 722; AVX512VL-NEXT: retq 723 %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 19, i32 4, i32 5, i32 6, i32 23, i32 8, i32 9, i32 10, i32 27, i32 12, i32 13, i32 14, i32 31> 724 ret <16 x i8> %shuffle 725} 726 727define <16 x i8> @shuffle_v16i8_00_01_02_03_20_05_06_23_08_09_10_11_28_13_14_31(<16 x i8> %a, <16 x i8> %b) { 728; SSE2-LABEL: shuffle_v16i8_00_01_02_03_20_05_06_23_08_09_10_11_28_13_14_31: 729; SSE2: # %bb.0: 730; SSE2-NEXT: movaps {{.*#+}} xmm2 = [255,255,255,255,0,255,255,0,255,255,255,255,0,255,255,0] 731; SSE2-NEXT: andps %xmm2, %xmm0 732; SSE2-NEXT: andnps %xmm1, %xmm2 733; SSE2-NEXT: orps %xmm2, %xmm0 734; SSE2-NEXT: retq 735; 736; SSSE3-LABEL: shuffle_v16i8_00_01_02_03_20_05_06_23_08_09_10_11_28_13_14_31: 737; SSSE3: # %bb.0: 738; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,xmm1[4],zero,zero,xmm1[7],zero,zero,zero,zero,xmm1[12],zero,zero,xmm1[15] 739; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3],zero,xmm0[5,6],zero,xmm0[8,9,10,11],zero,xmm0[13,14],zero 740; SSSE3-NEXT: por %xmm1, %xmm0 741; SSSE3-NEXT: retq 742; 743; SSE41-LABEL: shuffle_v16i8_00_01_02_03_20_05_06_23_08_09_10_11_28_13_14_31: 744; SSE41: # %bb.0: 745; SSE41-NEXT: movdqa %xmm0, %xmm2 746; SSE41-NEXT: movaps {{.*#+}} xmm0 = [255,255,255,255,0,255,255,0,255,255,255,255,0,255,255,0] 747; SSE41-NEXT: pblendvb %xmm0, %xmm2, %xmm1 748; SSE41-NEXT: movdqa %xmm1, %xmm0 749; SSE41-NEXT: retq 750; 751; AVX1-LABEL: shuffle_v16i8_00_01_02_03_20_05_06_23_08_09_10_11_28_13_14_31: 752; AVX1: # %bb.0: 753; AVX1-NEXT: vmovddup {{.*#+}} xmm2 = [255,255,255,255,0,255,255,0,255,255,255,255,0,255,255,0] 754; AVX1-NEXT: # xmm2 = mem[0,0] 755; AVX1-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0 756; AVX1-NEXT: retq 757; 758; AVX2-LABEL: shuffle_v16i8_00_01_02_03_20_05_06_23_08_09_10_11_28_13_14_31: 759; AVX2: # %bb.0: 760; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [255,255,255,255,0,255,255,0,255,255,255,255,0,255,255,0] 761; AVX2-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0 762; AVX2-NEXT: retq 763; 764; AVX512VL-LABEL: shuffle_v16i8_00_01_02_03_20_05_06_23_08_09_10_11_28_13_14_31: 765; AVX512VL: # %bb.0: 766; AVX512VL-NEXT: movw $-28528, %ax # imm = 0x9090 767; AVX512VL-NEXT: kmovd %eax, %k1 768; AVX512VL-NEXT: vmovdqu8 %xmm1, %xmm0 {%k1} 769; AVX512VL-NEXT: retq 770; 771; XOPAVX1-LABEL: shuffle_v16i8_00_01_02_03_20_05_06_23_08_09_10_11_28_13_14_31: 772; XOPAVX1: # %bb.0: 773; XOPAVX1-NEXT: vmovddup {{.*#+}} xmm2 = [255,255,255,255,0,255,255,0,255,255,255,255,0,255,255,0] 774; XOPAVX1-NEXT: # xmm2 = mem[0,0] 775; XOPAVX1-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0 776; XOPAVX1-NEXT: retq 777; 778; XOPAVX2-LABEL: shuffle_v16i8_00_01_02_03_20_05_06_23_08_09_10_11_28_13_14_31: 779; XOPAVX2: # %bb.0: 780; XOPAVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [255,255,255,255,0,255,255,0,255,255,255,255,0,255,255,0] 781; XOPAVX2-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0 782; XOPAVX2-NEXT: retq 783 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 20, i32 5, i32 6, i32 23, i32 8, i32 9, i32 10, i32 11, i32 28, i32 13, i32 14, i32 31> 784 ret <16 x i8> %shuffle 785} 786 787define <16 x i8> @shuffle_v16i8_16_17_18_19_04_05_06_07_24_25_10_11_28_13_30_15(<16 x i8> %a, <16 x i8> %b) { 788; SSE2-LABEL: shuffle_v16i8_16_17_18_19_04_05_06_07_24_25_10_11_28_13_30_15: 789; SSE2: # %bb.0: 790; SSE2-NEXT: movaps {{.*#+}} xmm2 = [255,255,255,255,0,0,0,0,255,255,0,0,255,0,255,0] 791; SSE2-NEXT: andps %xmm2, %xmm1 792; SSE2-NEXT: andnps %xmm0, %xmm2 793; SSE2-NEXT: orps %xmm1, %xmm2 794; SSE2-NEXT: movaps %xmm2, %xmm0 795; SSE2-NEXT: retq 796; 797; SSSE3-LABEL: shuffle_v16i8_16_17_18_19_04_05_06_07_24_25_10_11_28_13_30_15: 798; SSSE3: # %bb.0: 799; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[4,5,6,7],zero,zero,xmm0[10,11],zero,xmm0[13],zero,xmm0[15] 800; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,1,2,3],zero,zero,zero,zero,xmm1[8,9],zero,zero,xmm1[12],zero,xmm1[14],zero 801; SSSE3-NEXT: por %xmm1, %xmm0 802; SSSE3-NEXT: retq 803; 804; SSE41-LABEL: shuffle_v16i8_16_17_18_19_04_05_06_07_24_25_10_11_28_13_30_15: 805; SSE41: # %bb.0: 806; SSE41-NEXT: movdqa %xmm0, %xmm2 807; SSE41-NEXT: movaps {{.*#+}} xmm0 = [255,255,255,255,0,0,0,0,255,255,0,0,255,0,255,0] 808; SSE41-NEXT: pblendvb %xmm0, %xmm1, %xmm2 809; SSE41-NEXT: movdqa %xmm2, %xmm0 810; SSE41-NEXT: retq 811; 812; AVX1OR2-LABEL: shuffle_v16i8_16_17_18_19_04_05_06_07_24_25_10_11_28_13_30_15: 813; AVX1OR2: # %bb.0: 814; AVX1OR2-NEXT: vmovdqa {{.*#+}} xmm2 = [255,255,255,255,0,0,0,0,255,255,0,0,255,0,255,0] 815; AVX1OR2-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 816; AVX1OR2-NEXT: retq 817; 818; AVX512VL-LABEL: shuffle_v16i8_16_17_18_19_04_05_06_07_24_25_10_11_28_13_30_15: 819; AVX512VL: # %bb.0: 820; AVX512VL-NEXT: movw $-21264, %ax # imm = 0xACF0 821; AVX512VL-NEXT: kmovd %eax, %k1 822; AVX512VL-NEXT: vpblendmb %xmm0, %xmm1, %xmm0 {%k1} 823; AVX512VL-NEXT: retq 824 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 4, i32 5, i32 6, i32 7, i32 24, i32 25, i32 10, i32 11, i32 28, i32 13, i32 30, i32 15> 825 ret <16 x i8> %shuffle 826} 827 828define <16 x i8> @shuffle_v16i8_02_20_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu(<16 x i8> %a, <16 x i8> %b) { 829; SSE2-LABEL: shuffle_v16i8_02_20_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu: 830; SSE2: # %bb.0: 831; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,1,1] 832; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 833; SSE2-NEXT: psrlq $16, %xmm0 834; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 835; SSE2-NEXT: packuswb %xmm0, %xmm0 836; SSE2-NEXT: retq 837; 838; SSSE3-LABEL: shuffle_v16i8_02_20_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu: 839; SSSE3: # %bb.0: 840; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 841; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[4,9,u,u,u,u,u,u,u,u,u,u,u,u,u,u] 842; SSSE3-NEXT: retq 843; 844; SSE41-LABEL: shuffle_v16i8_02_20_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu: 845; SSE41: # %bb.0: 846; SSE41-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 847; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[4,9,u,u,u,u,u,u,u,u,u,u,u,u,u,u] 848; SSE41-NEXT: retq 849; 850; AVX1-LABEL: shuffle_v16i8_02_20_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu: 851; AVX1: # %bb.0: 852; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 853; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,9,u,u,u,u,u,u,u,u,u,u,u,u,u,u] 854; AVX1-NEXT: retq 855; 856; AVX2-LABEL: shuffle_v16i8_02_20_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu: 857; AVX2: # %bb.0: 858; AVX2-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 859; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,9,u,u,u,u,u,u,u,u,u,u,u,u,u,u] 860; AVX2-NEXT: retq 861; 862; AVX512VLBW-LABEL: shuffle_v16i8_02_20_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu: 863; AVX512VLBW: # %bb.0: 864; AVX512VLBW-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 865; AVX512VLBW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,9,u,u,u,u,u,u,u,u,u,u,u,u,u,u] 866; AVX512VLBW-NEXT: retq 867; 868; AVX512VLVBMI-LABEL: shuffle_v16i8_02_20_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu: 869; AVX512VLVBMI: # %bb.0: 870; AVX512VLVBMI-NEXT: vpbroadcastw {{.*#+}} xmm2 = [2,20,2,20,2,20,2,20,2,20,2,20,2,20,2,20] 871; AVX512VLVBMI-NEXT: vpermt2b %xmm1, %xmm2, %xmm0 872; AVX512VLVBMI-NEXT: retq 873; 874; XOP-LABEL: shuffle_v16i8_02_20_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu: 875; XOP: # %bb.0: 876; XOP-NEXT: vpperm {{.*#+}} xmm0 = xmm0[2],xmm1[4],xmm0[u,u,u,u,u,u,u,u,u,u,u,u,u,u] 877; XOP-NEXT: retq 878 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 2, i32 20, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 879 ret <16 x i8> %shuffle 880} 881 882; PR39387 883define <16 x i8> @shuffle_v16i8_5_6_7_8_9_10_27_28_29_30_31_0_1_2_3_4(<16 x i8> %a, <16 x i8> %b) { 884; SSE2-LABEL: shuffle_v16i8_5_6_7_8_9_10_27_28_29_30_31_0_1_2_3_4: 885; SSE2: # %bb.0: 886; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255,255,255,255,0,0,0,0,255] 887; SSE2-NEXT: movdqa %xmm0, %xmm3 888; SSE2-NEXT: pand %xmm2, %xmm3 889; SSE2-NEXT: pandn %xmm1, %xmm2 890; SSE2-NEXT: por %xmm3, %xmm2 891; SSE2-NEXT: pxor %xmm1, %xmm1 892; SSE2-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15] 893; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 894; SSE2-NEXT: movdqa %xmm0, %xmm1 895; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [65535,65535,65535,65535,65535,0,0,65535] 896; SSE2-NEXT: pand %xmm3, %xmm0 897; SSE2-NEXT: pandn %xmm2, %xmm3 898; SSE2-NEXT: pslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,xmm2[0,1,2,3,4,5,6,7,8,9] 899; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 900; SSE2-NEXT: por %xmm2, %xmm1 901; SSE2-NEXT: por %xmm0, %xmm3 902; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[3,1,2,0] 903; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,7,5,7] 904; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,2,1] 905; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,0,0,3,4,5,6,7] 906; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,6,7,4] 907; SSE2-NEXT: packuswb %xmm0, %xmm1 908; SSE2-NEXT: movdqa %xmm1, %xmm0 909; SSE2-NEXT: retq 910; 911; SSSE3-LABEL: shuffle_v16i8_5_6_7_8_9_10_27_28_29_30_31_0_1_2_3_4: 912; SSSE3: # %bb.0: 913; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10] 914; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,3,6,6,7,8,9] 915; SSSE3-NEXT: retq 916; 917; SSE41-LABEL: shuffle_v16i8_5_6_7_8_9_10_27_28_29_30_31_0_1_2_3_4: 918; SSE41: # %bb.0: 919; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10] 920; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,3,6,6,7,8,9] 921; SSE41-NEXT: retq 922; 923; AVX1-LABEL: shuffle_v16i8_5_6_7_8_9_10_27_28_29_30_31_0_1_2_3_4: 924; AVX1: # %bb.0: 925; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10] 926; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,3,6,6,7,8,9] 927; AVX1-NEXT: retq 928; 929; AVX2-LABEL: shuffle_v16i8_5_6_7_8_9_10_27_28_29_30_31_0_1_2_3_4: 930; AVX2: # %bb.0: 931; AVX2-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10] 932; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,3,6,6,7,8,9] 933; AVX2-NEXT: retq 934; 935; AVX512VLBW-LABEL: shuffle_v16i8_5_6_7_8_9_10_27_28_29_30_31_0_1_2_3_4: 936; AVX512VLBW: # %bb.0: 937; AVX512VLBW-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10] 938; AVX512VLBW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,3,6,6,7,8,9] 939; AVX512VLBW-NEXT: retq 940; 941; AVX512VLVBMI-LABEL: shuffle_v16i8_5_6_7_8_9_10_27_28_29_30_31_0_1_2_3_4: 942; AVX512VLVBMI: # %bb.0: 943; AVX512VLVBMI-NEXT: vmovdqa {{.*#+}} xmm2 = [5,6,7,8,9,10,27,28,29,30,30,1,1,2,3,4] 944; AVX512VLVBMI-NEXT: vpermt2b %xmm1, %xmm2, %xmm0 945; AVX512VLVBMI-NEXT: retq 946; 947; XOP-LABEL: shuffle_v16i8_5_6_7_8_9_10_27_28_29_30_31_0_1_2_3_4: 948; XOP: # %bb.0: 949; XOP-NEXT: vpperm {{.*#+}} xmm0 = xmm0[5,6,7,8,9,10],xmm1[11,12,13,14,14],xmm0[1,1,2,3,4] 950; XOP-NEXT: retq 951 %1 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 27, i32 28, i32 29, i32 30, i32 30, i32 1, i32 1, i32 2, i32 3, i32 4> 952 ret <16 x i8> %1 953} 954 955define <16 x i8> @shuffle_v16i8_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30(<16 x i8> %a, <16 x i8> %b) { 956; SSE2-LABEL: shuffle_v16i8_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30: 957; SSE2: # %bb.0: 958; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] 959; SSE2-NEXT: pand %xmm2, %xmm1 960; SSE2-NEXT: pand %xmm2, %xmm0 961; SSE2-NEXT: packuswb %xmm1, %xmm0 962; SSE2-NEXT: retq 963; 964; SSSE3-LABEL: shuffle_v16i8_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30: 965; SSSE3: # %bb.0: 966; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] 967; SSSE3-NEXT: pand %xmm2, %xmm1 968; SSSE3-NEXT: pand %xmm2, %xmm0 969; SSSE3-NEXT: packuswb %xmm1, %xmm0 970; SSSE3-NEXT: retq 971; 972; SSE41-LABEL: shuffle_v16i8_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30: 973; SSE41: # %bb.0: 974; SSE41-NEXT: pmovzxbw {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] 975; SSE41-NEXT: pand %xmm2, %xmm1 976; SSE41-NEXT: pand %xmm2, %xmm0 977; SSE41-NEXT: packuswb %xmm1, %xmm0 978; SSE41-NEXT: retq 979; 980; AVX1-LABEL: shuffle_v16i8_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30: 981; AVX1: # %bb.0: 982; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] 983; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1 984; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 985; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 986; AVX1-NEXT: retq 987; 988; AVX2-LABEL: shuffle_v16i8_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30: 989; AVX2: # %bb.0: 990; AVX2-NEXT: vpbroadcastw {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] 991; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1 992; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0 993; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 994; AVX2-NEXT: retq 995; 996; AVX512VL-LABEL: shuffle_v16i8_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30: 997; AVX512VL: # %bb.0: 998; AVX512VL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 999; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 1000; AVX512VL-NEXT: vpmovwb %ymm0, %xmm0 1001; AVX512VL-NEXT: vzeroupper 1002; AVX512VL-NEXT: retq 1003; 1004; XOP-LABEL: shuffle_v16i8_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30: 1005; XOP: # %bb.0: 1006; XOP-NEXT: vpperm {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],xmm1[0,2,4,6,8,10,12,14] 1007; XOP-NEXT: retq 1008 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30> 1009 ret <16 x i8> %shuffle 1010} 1011 1012define <16 x i8> @shuffle_v16i8_01_03_05_07_09_11_13_15_17_19_21_23_25_27_29_31(<16 x i8> %a, <16 x i8> %b) { 1013; SSE2-LABEL: shuffle_v16i8_01_03_05_07_09_11_13_15_17_19_21_23_25_27_29_31: 1014; SSE2: # %bb.0: 1015; SSE2-NEXT: psrlw $8, %xmm1 1016; SSE2-NEXT: psrlw $8, %xmm0 1017; SSE2-NEXT: packuswb %xmm1, %xmm0 1018; SSE2-NEXT: retq 1019; 1020; SSSE3-LABEL: shuffle_v16i8_01_03_05_07_09_11_13_15_17_19_21_23_25_27_29_31: 1021; SSSE3: # %bb.0: 1022; SSSE3-NEXT: movq {{.*#+}} xmm2 = [1,3,5,7,9,11,13,15,0,0,0,0,0,0,0,0] 1023; SSSE3-NEXT: pshufb %xmm2, %xmm1 1024; SSSE3-NEXT: pshufb %xmm2, %xmm0 1025; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1026; SSSE3-NEXT: retq 1027; 1028; SSE41-LABEL: shuffle_v16i8_01_03_05_07_09_11_13_15_17_19_21_23_25_27_29_31: 1029; SSE41: # %bb.0: 1030; SSE41-NEXT: movq {{.*#+}} xmm2 = [1,3,5,7,9,11,13,15,0,0,0,0,0,0,0,0] 1031; SSE41-NEXT: pshufb %xmm2, %xmm1 1032; SSE41-NEXT: pshufb %xmm2, %xmm0 1033; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1034; SSE41-NEXT: retq 1035; 1036; AVX1-LABEL: shuffle_v16i8_01_03_05_07_09_11_13_15_17_19_21_23_25_27_29_31: 1037; AVX1: # %bb.0: 1038; AVX1-NEXT: vmovq {{.*#+}} xmm2 = [1,3,5,7,9,11,13,15,0,0,0,0,0,0,0,0] 1039; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 1040; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 1041; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1042; AVX1-NEXT: retq 1043; 1044; AVX2-LABEL: shuffle_v16i8_01_03_05_07_09_11_13_15_17_19_21_23_25_27_29_31: 1045; AVX2: # %bb.0: 1046; AVX2-NEXT: vmovq {{.*#+}} xmm2 = [1,3,5,7,9,11,13,15,0,0,0,0,0,0,0,0] 1047; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1 1048; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0 1049; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1050; AVX2-NEXT: retq 1051; 1052; AVX512VLBW-LABEL: shuffle_v16i8_01_03_05_07_09_11_13_15_17_19_21_23_25_27_29_31: 1053; AVX512VLBW: # %bb.0: 1054; AVX512VLBW-NEXT: vmovq {{.*#+}} xmm2 = [1,3,5,7,9,11,13,15,0,0,0,0,0,0,0,0] 1055; AVX512VLBW-NEXT: vpshufb %xmm2, %xmm1, %xmm1 1056; AVX512VLBW-NEXT: vpshufb %xmm2, %xmm0, %xmm0 1057; AVX512VLBW-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1058; AVX512VLBW-NEXT: retq 1059; 1060; AVX512VLVBMI-LABEL: shuffle_v16i8_01_03_05_07_09_11_13_15_17_19_21_23_25_27_29_31: 1061; AVX512VLVBMI: # %bb.0: 1062; AVX512VLVBMI-NEXT: vmovdqa {{.*#+}} xmm2 = [1,3,5,7,9,11,13,15,17,19,21,23,25,27,29,31] 1063; AVX512VLVBMI-NEXT: vpermt2b %xmm1, %xmm2, %xmm0 1064; AVX512VLVBMI-NEXT: retq 1065; 1066; XOP-LABEL: shuffle_v16i8_01_03_05_07_09_11_13_15_17_19_21_23_25_27_29_31: 1067; XOP: # %bb.0: 1068; XOP-NEXT: vpperm {{.*#+}} xmm0 = xmm0[1,3,5,7,9,11,13,15],xmm1[1,3,5,7,9,11,13,15] 1069; XOP-NEXT: retq 1070 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31> 1071 ret <16 x i8> %shuffle 1072} 1073 1074; PR27780 - https://bugs.llvm.org/show_bug.cgi?id=27780 1075 1076define <16 x i8> @load_fold_pblendvb(ptr %px, <16 x i8> %y) { 1077; SSE2-LABEL: load_fold_pblendvb: 1078; SSE2: # %bb.0: 1079; SSE2-NEXT: movaps {{.*#+}} xmm1 = [255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255] 1080; SSE2-NEXT: andps %xmm1, %xmm0 1081; SSE2-NEXT: andnps (%rdi), %xmm1 1082; SSE2-NEXT: orps %xmm1, %xmm0 1083; SSE2-NEXT: retq 1084; 1085; SSSE3-LABEL: load_fold_pblendvb: 1086; SSSE3: # %bb.0: 1087; SSSE3-NEXT: movdqa (%rdi), %xmm1 1088; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1],zero,xmm0[3],zero,zero,zero,xmm0[7,8,9],zero,xmm0[11],zero,zero,zero,xmm0[15] 1089; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,xmm1[2],zero,xmm1[4,5,6],zero,zero,zero,xmm1[10],zero,xmm1[12,13,14],zero 1090; SSSE3-NEXT: por %xmm1, %xmm0 1091; SSSE3-NEXT: retq 1092; 1093; SSE41-LABEL: load_fold_pblendvb: 1094; SSE41: # %bb.0: 1095; SSE41-NEXT: movdqa %xmm0, %xmm1 1096; SSE41-NEXT: movaps {{.*#+}} xmm0 = [0,0,255,0,255,255,255,0,0,0,255,0,255,255,255,0] 1097; SSE41-NEXT: pblendvb %xmm0, (%rdi), %xmm1 1098; SSE41-NEXT: movdqa %xmm1, %xmm0 1099; SSE41-NEXT: retq 1100; 1101; AVX1-LABEL: load_fold_pblendvb: 1102; AVX1: # %bb.0: 1103; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = [0,0,255,0,255,255,255,0,0,0,255,0,255,255,255,0] 1104; AVX1-NEXT: # xmm1 = mem[0,0] 1105; AVX1-NEXT: vpblendvb %xmm1, (%rdi), %xmm0, %xmm0 1106; AVX1-NEXT: retq 1107; 1108; AVX2-LABEL: load_fold_pblendvb: 1109; AVX2: # %bb.0: 1110; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [0,0,255,0,255,255,255,0,0,0,255,0,255,255,255,0] 1111; AVX2-NEXT: vpblendvb %xmm1, (%rdi), %xmm0, %xmm0 1112; AVX2-NEXT: retq 1113; 1114; AVX512VL-LABEL: load_fold_pblendvb: 1115; AVX512VL: # %bb.0: 1116; AVX512VL-NEXT: movw $29812, %ax # imm = 0x7474 1117; AVX512VL-NEXT: kmovd %eax, %k1 1118; AVX512VL-NEXT: vmovdqu8 (%rdi), %xmm0 {%k1} 1119; AVX512VL-NEXT: retq 1120; 1121; XOPAVX1-LABEL: load_fold_pblendvb: 1122; XOPAVX1: # %bb.0: 1123; XOPAVX1-NEXT: vmovddup {{.*#+}} xmm1 = [0,0,255,0,255,255,255,0,0,0,255,0,255,255,255,0] 1124; XOPAVX1-NEXT: # xmm1 = mem[0,0] 1125; XOPAVX1-NEXT: vpblendvb %xmm1, (%rdi), %xmm0, %xmm0 1126; XOPAVX1-NEXT: retq 1127; 1128; XOPAVX2-LABEL: load_fold_pblendvb: 1129; XOPAVX2: # %bb.0: 1130; XOPAVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [0,0,255,0,255,255,255,0,0,0,255,0,255,255,255,0] 1131; XOPAVX2-NEXT: vpblendvb %xmm1, (%rdi), %xmm0, %xmm0 1132; XOPAVX2-NEXT: retq 1133 %x = load <16 x i8>, ptr %px, align 16 1134 %select = shufflevector <16 x i8> %x, <16 x i8> %y, <16 x i32> <i32 16, i32 17, i32 2, i32 19, i32 4, i32 5, i32 6, i32 23, i32 24, i32 25, i32 10, i32 27, i32 12, i32 13, i32 14, i32 31> 1135 ret <16 x i8> %select 1136} 1137 1138define <16 x i8> @load_fold_pblendvb_commute(ptr %px, <16 x i8> %y) { 1139; SSE2-LABEL: load_fold_pblendvb_commute: 1140; SSE2: # %bb.0: 1141; SSE2-NEXT: movaps {{.*#+}} xmm1 = [255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255] 1142; SSE2-NEXT: movaps %xmm1, %xmm2 1143; SSE2-NEXT: andnps %xmm0, %xmm2 1144; SSE2-NEXT: andps (%rdi), %xmm1 1145; SSE2-NEXT: orps %xmm2, %xmm1 1146; SSE2-NEXT: movaps %xmm1, %xmm0 1147; SSE2-NEXT: retq 1148; 1149; SSSE3-LABEL: load_fold_pblendvb_commute: 1150; SSSE3: # %bb.0: 1151; SSSE3-NEXT: movdqa (%rdi), %xmm1 1152; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,xmm0[2],zero,xmm0[4,5,6],zero,zero,zero,xmm0[10],zero,xmm0[12,13,14],zero 1153; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,1],zero,xmm1[3],zero,zero,zero,xmm1[7,8,9],zero,xmm1[11],zero,zero,zero,xmm1[15] 1154; SSSE3-NEXT: por %xmm1, %xmm0 1155; SSSE3-NEXT: retq 1156; 1157; SSE41-LABEL: load_fold_pblendvb_commute: 1158; SSE41: # %bb.0: 1159; SSE41-NEXT: movdqa %xmm0, %xmm1 1160; SSE41-NEXT: movaps {{.*#+}} xmm0 = [255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255] 1161; SSE41-NEXT: pblendvb %xmm0, (%rdi), %xmm1 1162; SSE41-NEXT: movdqa %xmm1, %xmm0 1163; SSE41-NEXT: retq 1164; 1165; AVX1-LABEL: load_fold_pblendvb_commute: 1166; AVX1: # %bb.0: 1167; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = [255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255] 1168; AVX1-NEXT: # xmm1 = mem[0,0] 1169; AVX1-NEXT: vpblendvb %xmm1, (%rdi), %xmm0, %xmm0 1170; AVX1-NEXT: retq 1171; 1172; AVX2-LABEL: load_fold_pblendvb_commute: 1173; AVX2: # %bb.0: 1174; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255] 1175; AVX2-NEXT: vpblendvb %xmm1, (%rdi), %xmm0, %xmm0 1176; AVX2-NEXT: retq 1177; 1178; AVX512VL-LABEL: load_fold_pblendvb_commute: 1179; AVX512VL: # %bb.0: 1180; AVX512VL-NEXT: vmovdqa (%rdi), %xmm1 1181; AVX512VL-NEXT: movw $29812, %ax # imm = 0x7474 1182; AVX512VL-NEXT: kmovd %eax, %k1 1183; AVX512VL-NEXT: vmovdqu8 %xmm0, %xmm1 {%k1} 1184; AVX512VL-NEXT: vmovdqa %xmm1, %xmm0 1185; AVX512VL-NEXT: retq 1186; 1187; XOPAVX1-LABEL: load_fold_pblendvb_commute: 1188; XOPAVX1: # %bb.0: 1189; XOPAVX1-NEXT: vmovddup {{.*#+}} xmm1 = [255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255] 1190; XOPAVX1-NEXT: # xmm1 = mem[0,0] 1191; XOPAVX1-NEXT: vpblendvb %xmm1, (%rdi), %xmm0, %xmm0 1192; XOPAVX1-NEXT: retq 1193; 1194; XOPAVX2-LABEL: load_fold_pblendvb_commute: 1195; XOPAVX2: # %bb.0: 1196; XOPAVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255] 1197; XOPAVX2-NEXT: vpblendvb %xmm1, (%rdi), %xmm0, %xmm0 1198; XOPAVX2-NEXT: retq 1199 %x = load <16 x i8>, ptr %px, align 16 1200 %select = shufflevector <16 x i8> %y, <16 x i8> %x, <16 x i32> <i32 16, i32 17, i32 2, i32 19, i32 4, i32 5, i32 6, i32 23, i32 24, i32 25, i32 10, i32 27, i32 12, i32 13, i32 14, i32 31> 1201 ret <16 x i8> %select 1202} 1203 1204define <16 x i8> @trunc_v4i32_shuffle(<16 x i8> %a) { 1205; SSE2-LABEL: trunc_v4i32_shuffle: 1206; SSE2: # %bb.0: 1207; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1208; SSE2-NEXT: packuswb %xmm0, %xmm0 1209; SSE2-NEXT: packuswb %xmm0, %xmm0 1210; SSE2-NEXT: retq 1211; 1212; SSSE3-LABEL: trunc_v4i32_shuffle: 1213; SSSE3: # %bb.0: 1214; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] 1215; SSSE3-NEXT: retq 1216; 1217; SSE41-LABEL: trunc_v4i32_shuffle: 1218; SSE41: # %bb.0: 1219; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] 1220; SSE41-NEXT: retq 1221; 1222; AVX1OR2-LABEL: trunc_v4i32_shuffle: 1223; AVX1OR2: # %bb.0: 1224; AVX1OR2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] 1225; AVX1OR2-NEXT: retq 1226; 1227; AVX512VL-LABEL: trunc_v4i32_shuffle: 1228; AVX512VL: # %bb.0: 1229; AVX512VL-NEXT: vpmovdb %xmm0, %xmm0 1230; AVX512VL-NEXT: retq 1231 %shuffle = shufflevector <16 x i8> %a, <16 x i8> poison, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 1232 ret <16 x i8> %shuffle 1233} 1234 1235define <16 x i8> @stress_test0(<16 x i8> %s.0.1, <16 x i8> %s.0.2, <16 x i8> %s.0.3, <16 x i8> %s.0.4, <16 x i8> %s.0.5, <16 x i8> %s.0.6, <16 x i8> %s.0.7, <16 x i8> %s.0.8, <16 x i8> %s.0.9) { 1236; We don't have anything useful to check here. This generates 100s of 1237; instructions. Instead, just make sure we survived codegen. 1238; ALL-LABEL: stress_test0: 1239; ALL: retq 1240entry: 1241 %s.1.4 = shufflevector <16 x i8> %s.0.4, <16 x i8> %s.0.5, <16 x i32> <i32 1, i32 22, i32 21, i32 28, i32 3, i32 16, i32 6, i32 1, i32 19, i32 29, i32 12, i32 31, i32 2, i32 3, i32 3, i32 6> 1242 %s.1.5 = shufflevector <16 x i8> %s.0.5, <16 x i8> %s.0.6, <16 x i32> <i32 31, i32 20, i32 12, i32 19, i32 2, i32 15, i32 12, i32 31, i32 2, i32 28, i32 2, i32 30, i32 7, i32 8, i32 17, i32 28> 1243 %s.1.8 = shufflevector <16 x i8> %s.0.8, <16 x i8> %s.0.9, <16 x i32> <i32 14, i32 10, i32 17, i32 5, i32 17, i32 9, i32 17, i32 21, i32 31, i32 24, i32 16, i32 6, i32 20, i32 28, i32 23, i32 8> 1244 %s.2.2 = shufflevector <16 x i8> %s.0.3, <16 x i8> %s.0.4, <16 x i32> <i32 20, i32 9, i32 21, i32 11, i32 11, i32 4, i32 3, i32 18, i32 3, i32 30, i32 4, i32 31, i32 11, i32 24, i32 13, i32 29> 1245 %s.3.2 = shufflevector <16 x i8> %s.2.2, <16 x i8> %s.1.4, <16 x i32> <i32 15, i32 13, i32 5, i32 11, i32 7, i32 17, i32 14, i32 22, i32 22, i32 16, i32 7, i32 24, i32 16, i32 22, i32 7, i32 29> 1246 %s.5.4 = shufflevector <16 x i8> %s.1.5, <16 x i8> %s.1.8, <16 x i32> <i32 3, i32 13, i32 19, i32 7, i32 23, i32 11, i32 1, i32 9, i32 16, i32 25, i32 2, i32 7, i32 0, i32 21, i32 23, i32 17> 1247 %s.6.1 = shufflevector <16 x i8> %s.3.2, <16 x i8> %s.3.2, <16 x i32> <i32 11, i32 2, i32 28, i32 31, i32 27, i32 3, i32 9, i32 27, i32 25, i32 25, i32 14, i32 7, i32 12, i32 28, i32 12, i32 23> 1248 %s.7.1 = shufflevector <16 x i8> %s.6.1, <16 x i8> %s.3.2, <16 x i32> <i32 15, i32 29, i32 14, i32 0, i32 29, i32 15, i32 26, i32 30, i32 6, i32 7, i32 2, i32 8, i32 12, i32 10, i32 29, i32 17> 1249 %s.7.2 = shufflevector <16 x i8> %s.3.2, <16 x i8> %s.5.4, <16 x i32> <i32 3, i32 29, i32 3, i32 19, i32 poison, i32 20, i32 poison, i32 3, i32 27, i32 poison, i32 poison, i32 11, i32 poison, i32 poison, i32 poison, i32 poison> 1250 %s.16.0 = shufflevector <16 x i8> %s.7.1, <16 x i8> %s.7.2, <16 x i32> <i32 13, i32 1, i32 16, i32 16, i32 6, i32 7, i32 29, i32 18, i32 19, i32 28, i32 poison, i32 poison, i32 31, i32 1, i32 poison, i32 10> 1251 ret <16 x i8> %s.16.0 1252} 1253 1254define <16 x i8> @poison_test1(<16 x i8> %s.0.5, <16 x i8> %s.0.8, <16 x i8> %s.0.9) noinline nounwind { 1255; There is nothing interesting to check about these instructions other than 1256; that they survive codegen. However, we actually do better and delete all of 1257; them because the result is 'poison'. 1258; 1259; ALL-LABEL: poison_test1: 1260; ALL: # %bb.0: # %entry 1261; ALL-NEXT: retq 1262entry: 1263 %s.1.8 = shufflevector <16 x i8> %s.0.8, <16 x i8> poison, <16 x i32> <i32 9, i32 9, i32 poison, i32 poison, i32 poison, i32 2, i32 poison, i32 6, i32 poison, i32 6, i32 poison, i32 14, i32 14, i32 poison, i32 poison, i32 0> 1264 %s.2.4 = shufflevector <16 x i8> poison, <16 x i8> %s.0.5, <16 x i32> <i32 21, i32 poison, i32 poison, i32 19, i32 poison, i32 poison, i32 29, i32 24, i32 21, i32 23, i32 21, i32 17, i32 19, i32 poison, i32 20, i32 22> 1265 %s.2.5 = shufflevector <16 x i8> %s.0.5, <16 x i8> poison, <16 x i32> <i32 3, i32 8, i32 poison, i32 7, i32 poison, i32 10, i32 8, i32 0, i32 15, i32 poison, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 9> 1266 %s.2.9 = shufflevector <16 x i8> %s.0.9, <16 x i8> poison, <16 x i32> <i32 7, i32 poison, i32 14, i32 7, i32 8, i32 poison, i32 7, i32 8, i32 5, i32 15, i32 poison, i32 1, i32 11, i32 poison, i32 poison, i32 11> 1267 %s.3.4 = shufflevector <16 x i8> %s.2.4, <16 x i8> %s.0.5, <16 x i32> <i32 5, i32 0, i32 21, i32 6, i32 15, i32 27, i32 22, i32 21, i32 4, i32 22, i32 19, i32 26, i32 9, i32 26, i32 8, i32 29> 1268 %s.3.9 = shufflevector <16 x i8> %s.2.9, <16 x i8> poison, <16 x i32> <i32 8, i32 6, i32 8, i32 1, i32 poison, i32 4, i32 poison, i32 2, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 6, i32 poison> 1269 %s.4.7 = shufflevector <16 x i8> %s.1.8, <16 x i8> %s.2.9, <16 x i32> <i32 9, i32 0, i32 22, i32 20, i32 24, i32 7, i32 21, i32 17, i32 20, i32 12, i32 19, i32 23, i32 2, i32 9, i32 17, i32 10> 1270 %s.4.8 = shufflevector <16 x i8> %s.2.9, <16 x i8> %s.3.9, <16 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 6, i32 10, i32 poison, i32 0, i32 5, i32 poison, i32 9, i32 poison> 1271 %s.5.7 = shufflevector <16 x i8> %s.4.7, <16 x i8> %s.4.8, <16 x i32> <i32 16, i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 1272 %s.8.4 = shufflevector <16 x i8> %s.3.4, <16 x i8> %s.5.7, <16 x i32> <i32 poison, i32 poison, i32 poison, i32 28, i32 poison, i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 1273 %s.9.4 = shufflevector <16 x i8> %s.8.4, <16 x i8> poison, <16 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 10, i32 5> 1274 %s.10.4 = shufflevector <16 x i8> %s.9.4, <16 x i8> poison, <16 x i32> <i32 poison, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 1275 %s.12.4 = shufflevector <16 x i8> %s.10.4, <16 x i8> poison, <16 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 13, i32 poison, i32 poison, i32 poison> 1276 1277 ret <16 x i8> %s.12.4 1278} 1279 1280define <16 x i8> @PR20540(<8 x i8> %a) { 1281; SSE-LABEL: PR20540: 1282; SSE: # %bb.0: 1283; SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero 1284; SSE-NEXT: retq 1285; 1286; AVX-LABEL: PR20540: 1287; AVX: # %bb.0: 1288; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 1289; AVX-NEXT: retq 1290 %shuffle = shufflevector <8 x i8> %a, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8> 1291 ret <16 x i8> %shuffle 1292} 1293 1294define <16 x i8> @shuffle_v16i8_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz(i8 %i) { 1295; SSE-LABEL: shuffle_v16i8_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz: 1296; SSE: # %bb.0: 1297; SSE-NEXT: movzbl %dil, %eax 1298; SSE-NEXT: movd %eax, %xmm0 1299; SSE-NEXT: retq 1300; 1301; AVX-LABEL: shuffle_v16i8_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz: 1302; AVX: # %bb.0: 1303; AVX-NEXT: movzbl %dil, %eax 1304; AVX-NEXT: vmovd %eax, %xmm0 1305; AVX-NEXT: retq 1306 %a = insertelement <16 x i8> poison, i8 %i, i32 0 1307 %shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 16, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 1308 ret <16 x i8> %shuffle 1309} 1310 1311define <16 x i8> @shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz(i8 %i) { 1312; SSE2-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz: 1313; SSE2: # %bb.0: 1314; SSE2-NEXT: shll $8, %edi 1315; SSE2-NEXT: pxor %xmm0, %xmm0 1316; SSE2-NEXT: pinsrw $2, %edi, %xmm0 1317; SSE2-NEXT: retq 1318; 1319; SSSE3-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz: 1320; SSSE3: # %bb.0: 1321; SSSE3-NEXT: shll $8, %edi 1322; SSSE3-NEXT: pxor %xmm0, %xmm0 1323; SSSE3-NEXT: pinsrw $2, %edi, %xmm0 1324; SSSE3-NEXT: retq 1325; 1326; SSE41-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz: 1327; SSE41: # %bb.0: 1328; SSE41-NEXT: pxor %xmm0, %xmm0 1329; SSE41-NEXT: pinsrb $5, %edi, %xmm0 1330; SSE41-NEXT: retq 1331; 1332; AVX-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz: 1333; AVX: # %bb.0: 1334; AVX-NEXT: vpxor %xmm0, %xmm0, %xmm0 1335; AVX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0 1336; AVX-NEXT: retq 1337 %a = insertelement <16 x i8> poison, i8 %i, i32 0 1338 %shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1339 ret <16 x i8> %shuffle 1340} 1341 1342define <16 x i8> @shuffle_v16i8_zz_uu_uu_zz_uu_uu_zz_zz_zz_zz_zz_zz_zz_zz_zz_16(i8 %i) { 1343; SSE2-LABEL: shuffle_v16i8_zz_uu_uu_zz_uu_uu_zz_zz_zz_zz_zz_zz_zz_zz_zz_16: 1344; SSE2: # %bb.0: 1345; SSE2-NEXT: shll $8, %edi 1346; SSE2-NEXT: pxor %xmm0, %xmm0 1347; SSE2-NEXT: pinsrw $7, %edi, %xmm0 1348; SSE2-NEXT: retq 1349; 1350; SSSE3-LABEL: shuffle_v16i8_zz_uu_uu_zz_uu_uu_zz_zz_zz_zz_zz_zz_zz_zz_zz_16: 1351; SSSE3: # %bb.0: 1352; SSSE3-NEXT: shll $8, %edi 1353; SSSE3-NEXT: pxor %xmm0, %xmm0 1354; SSSE3-NEXT: pinsrw $7, %edi, %xmm0 1355; SSSE3-NEXT: retq 1356; 1357; SSE41-LABEL: shuffle_v16i8_zz_uu_uu_zz_uu_uu_zz_zz_zz_zz_zz_zz_zz_zz_zz_16: 1358; SSE41: # %bb.0: 1359; SSE41-NEXT: pxor %xmm0, %xmm0 1360; SSE41-NEXT: pinsrb $15, %edi, %xmm0 1361; SSE41-NEXT: retq 1362; 1363; AVX-LABEL: shuffle_v16i8_zz_uu_uu_zz_uu_uu_zz_zz_zz_zz_zz_zz_zz_zz_zz_16: 1364; AVX: # %bb.0: 1365; AVX-NEXT: vpxor %xmm0, %xmm0, %xmm0 1366; AVX-NEXT: vpinsrb $15, %edi, %xmm0, %xmm0 1367; AVX-NEXT: retq 1368 %a = insertelement <16 x i8> poison, i8 %i, i32 0 1369 %shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 0, i32 poison, i32 poison, i32 3, i32 poison, i32 poison, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 16> 1370 ret <16 x i8> %shuffle 1371} 1372 1373define <16 x i8> @shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz(i8 %i) { 1374; SSE2-LABEL: shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz: 1375; SSE2: # %bb.0: 1376; SSE2-NEXT: movzbl %dil, %eax 1377; SSE2-NEXT: pxor %xmm0, %xmm0 1378; SSE2-NEXT: pinsrw $1, %eax, %xmm0 1379; SSE2-NEXT: retq 1380; 1381; SSSE3-LABEL: shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz: 1382; SSSE3: # %bb.0: 1383; SSSE3-NEXT: movzbl %dil, %eax 1384; SSSE3-NEXT: pxor %xmm0, %xmm0 1385; SSSE3-NEXT: pinsrw $1, %eax, %xmm0 1386; SSSE3-NEXT: retq 1387; 1388; SSE41-LABEL: shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz: 1389; SSE41: # %bb.0: 1390; SSE41-NEXT: pxor %xmm0, %xmm0 1391; SSE41-NEXT: pinsrb $2, %edi, %xmm0 1392; SSE41-NEXT: retq 1393; 1394; AVX-LABEL: shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz: 1395; AVX: # %bb.0: 1396; AVX-NEXT: vpxor %xmm0, %xmm0, %xmm0 1397; AVX-NEXT: vpinsrb $2, %edi, %xmm0, %xmm0 1398; AVX-NEXT: retq 1399 %a = insertelement <16 x i8> poison, i8 %i, i32 3 1400 %shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 0, i32 1, i32 19, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 1401 ret <16 x i8> %shuffle 1402} 1403 1404define <16 x i8> @shuffle_v16i8_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_16_uu_18_uu(<16 x i8> %a) { 1405; SSE-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_16_uu_18_uu: 1406; SSE: # %bb.0: 1407; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3] 1408; SSE-NEXT: retq 1409; 1410; AVX-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_16_uu_18_uu: 1411; AVX: # %bb.0: 1412; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3] 1413; AVX-NEXT: retq 1414 %shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 poison, i32 18, i32 poison> 1415 ret <16 x i8> %shuffle 1416} 1417 1418define <16 x i8> @shuffle_v16i8_28_uu_30_31_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz(<16 x i8> %a) { 1419; SSE-LABEL: shuffle_v16i8_28_uu_30_31_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz: 1420; SSE: # %bb.0: 1421; SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1422; SSE-NEXT: retq 1423; 1424; AVX-LABEL: shuffle_v16i8_28_uu_30_31_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz: 1425; AVX: # %bb.0: 1426; AVX-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1427; AVX-NEXT: retq 1428 %shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 28, i32 poison, i32 30, i32 31, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 09, i32 0, i32 0, i32 0, i32 0, i32 0> 1429 ret <16 x i8> %shuffle 1430} 1431 1432define <16 x i8> @shuffle_v16i8_31_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14(<16 x i8> %a, <16 x i8> %b) { 1433; SSE2-LABEL: shuffle_v16i8_31_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14: 1434; SSE2: # %bb.0: 1435; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1436; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] 1437; SSE2-NEXT: por %xmm1, %xmm0 1438; SSE2-NEXT: retq 1439; 1440; SSSE3-LABEL: shuffle_v16i8_31_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14: 1441; SSSE3: # %bb.0: 1442; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] 1443; SSSE3-NEXT: retq 1444; 1445; SSE41-LABEL: shuffle_v16i8_31_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14: 1446; SSE41: # %bb.0: 1447; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] 1448; SSE41-NEXT: retq 1449; 1450; AVX-LABEL: shuffle_v16i8_31_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14: 1451; AVX: # %bb.0: 1452; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] 1453; AVX-NEXT: retq 1454 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 31, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14> 1455 ret <16 x i8> %shuffle 1456} 1457 1458define <16 x i8> @shuffle_v16i8_15_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14(<16 x i8> %a, <16 x i8> %b) { 1459; SSE2-LABEL: shuffle_v16i8_15_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14: 1460; SSE2: # %bb.0: 1461; SSE2-NEXT: movdqa %xmm0, %xmm1 1462; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1463; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] 1464; SSE2-NEXT: por %xmm1, %xmm0 1465; SSE2-NEXT: retq 1466; 1467; SSSE3-LABEL: shuffle_v16i8_15_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14: 1468; SSSE3: # %bb.0: 1469; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm0[15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] 1470; SSSE3-NEXT: retq 1471; 1472; SSE41-LABEL: shuffle_v16i8_15_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14: 1473; SSE41: # %bb.0: 1474; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm0[15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] 1475; SSE41-NEXT: retq 1476; 1477; AVX-LABEL: shuffle_v16i8_15_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14: 1478; AVX: # %bb.0: 1479; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] 1480; AVX-NEXT: retq 1481 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14> 1482 ret <16 x i8> %shuffle 1483} 1484 1485define <16 x i8> @shuffle_v16i8_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_00(<16 x i8> %a, <16 x i8> %b) { 1486; SSE2-LABEL: shuffle_v16i8_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_00: 1487; SSE2: # %bb.0: 1488; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero 1489; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0] 1490; SSE2-NEXT: por %xmm1, %xmm0 1491; SSE2-NEXT: retq 1492; 1493; SSSE3-LABEL: shuffle_v16i8_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_00: 1494; SSSE3: # %bb.0: 1495; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0] 1496; SSSE3-NEXT: retq 1497; 1498; SSE41-LABEL: shuffle_v16i8_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_00: 1499; SSE41: # %bb.0: 1500; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0] 1501; SSE41-NEXT: retq 1502; 1503; AVX-LABEL: shuffle_v16i8_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_00: 1504; AVX: # %bb.0: 1505; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0] 1506; AVX-NEXT: retq 1507 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 0> 1508 ret <16 x i8> %shuffle 1509} 1510 1511define <16 x i8> @shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16(<16 x i8> %a, <16 x i8> %b) { 1512; SSE2-LABEL: shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16: 1513; SSE2: # %bb.0: 1514; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero 1515; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0] 1516; SSE2-NEXT: por %xmm1, %xmm0 1517; SSE2-NEXT: retq 1518; 1519; SSSE3-LABEL: shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16: 1520; SSSE3: # %bb.0: 1521; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0] 1522; SSSE3-NEXT: movdqa %xmm1, %xmm0 1523; SSSE3-NEXT: retq 1524; 1525; SSE41-LABEL: shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16: 1526; SSE41: # %bb.0: 1527; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0] 1528; SSE41-NEXT: movdqa %xmm1, %xmm0 1529; SSE41-NEXT: retq 1530; 1531; AVX-LABEL: shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16: 1532; AVX: # %bb.0: 1533; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0] 1534; AVX-NEXT: retq 1535 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16> 1536 ret <16 x i8> %shuffle 1537} 1538 1539define <16 x i8> @shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00(<16 x i8> %a, <16 x i8> %b) { 1540; SSE2-LABEL: shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00: 1541; SSE2: # %bb.0: 1542; SSE2-NEXT: movdqa %xmm0, %xmm1 1543; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero 1544; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0] 1545; SSE2-NEXT: por %xmm1, %xmm0 1546; SSE2-NEXT: retq 1547; 1548; SSSE3-LABEL: shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00: 1549; SSSE3: # %bb.0: 1550; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0] 1551; SSSE3-NEXT: retq 1552; 1553; SSE41-LABEL: shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00: 1554; SSE41: # %bb.0: 1555; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0] 1556; SSE41-NEXT: retq 1557; 1558; AVX-LABEL: shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00: 1559; AVX: # %bb.0: 1560; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0] 1561; AVX-NEXT: retq 1562 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0> 1563 ret <16 x i8> %shuffle 1564} 1565 1566define <16 x i8> @shuffle_v16i8_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30(<16 x i8> %a, <16 x i8> %b) { 1567; SSE2-LABEL: shuffle_v16i8_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30: 1568; SSE2: # %bb.0: 1569; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1570; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] 1571; SSE2-NEXT: por %xmm1, %xmm0 1572; SSE2-NEXT: retq 1573; 1574; SSSE3-LABEL: shuffle_v16i8_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30: 1575; SSSE3: # %bb.0: 1576; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] 1577; SSSE3-NEXT: movdqa %xmm1, %xmm0 1578; SSSE3-NEXT: retq 1579; 1580; SSE41-LABEL: shuffle_v16i8_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30: 1581; SSE41: # %bb.0: 1582; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] 1583; SSE41-NEXT: movdqa %xmm1, %xmm0 1584; SSE41-NEXT: retq 1585; 1586; AVX-LABEL: shuffle_v16i8_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30: 1587; AVX: # %bb.0: 1588; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] 1589; AVX-NEXT: retq 1590 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30> 1591 ret <16 x i8> %shuffle 1592} 1593 1594; PR31151 1595define <16 x i8> @shuffle_v16i8_00_16_01_17_04_20_05_21_02_18_03_19_06_22_07_23(<16 x i8> %val1, <16 x i8> %val2) { 1596; SSE-LABEL: shuffle_v16i8_00_16_01_17_04_20_05_21_02_18_03_19_06_22_07_23: 1597; SSE: # %bb.0: 1598; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1599; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] 1600; SSE-NEXT: retq 1601; 1602; AVX-LABEL: shuffle_v16i8_00_16_01_17_04_20_05_21_02_18_03_19_06_22_07_23: 1603; AVX: # %bb.0: 1604; AVX-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1605; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] 1606; AVX-NEXT: retq 1607 %shuffle = shufflevector <16 x i8> %val1, <16 x i8> %val2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23> 1608 ret <16 x i8> %shuffle 1609} 1610 1611define <16 x i8> @shuffle_v16i8_00_uu_uu_uu_uu_uu_uu_uu_01_uu_uu_uu_uu_uu_uu_uu(<16 x i8> %a) { 1612; SSE2-LABEL: shuffle_v16i8_00_uu_uu_uu_uu_uu_uu_uu_01_uu_uu_uu_uu_uu_uu_uu: 1613; SSE2: # %bb.0: 1614; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1615; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 1616; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3] 1617; SSE2-NEXT: retq 1618; 1619; SSSE3-LABEL: shuffle_v16i8_00_uu_uu_uu_uu_uu_uu_uu_01_uu_uu_uu_uu_uu_uu_uu: 1620; SSSE3: # %bb.0: 1621; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,u,u,u,u,u,u,u,1,u,u,u,u,u,u,u] 1622; SSSE3-NEXT: retq 1623; 1624; SSE41-LABEL: shuffle_v16i8_00_uu_uu_uu_uu_uu_uu_uu_01_uu_uu_uu_uu_uu_uu_uu: 1625; SSE41: # %bb.0: 1626; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 1627; SSE41-NEXT: retq 1628; 1629; AVX-LABEL: shuffle_v16i8_00_uu_uu_uu_uu_uu_uu_uu_01_uu_uu_uu_uu_uu_uu_uu: 1630; AVX: # %bb.0: 1631; AVX-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 1632; AVX-NEXT: retq 1633 %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 1634 ret <16 x i8> %shuffle 1635} 1636 1637define <16 x i8> @shuffle_v16i8_00_zz_zz_zz_zz_zz_zz_zz_01_zz_zz_zz_zz_zz_zz_zz(<16 x i8> %a) { 1638; SSE2-LABEL: shuffle_v16i8_00_zz_zz_zz_zz_zz_zz_zz_01_zz_zz_zz_zz_zz_zz_zz: 1639; SSE2: # %bb.0: 1640; SSE2-NEXT: pxor %xmm1, %xmm1 1641; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1642; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1643; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1644; SSE2-NEXT: retq 1645; 1646; SSSE3-LABEL: shuffle_v16i8_00_zz_zz_zz_zz_zz_zz_zz_01_zz_zz_zz_zz_zz_zz_zz: 1647; SSSE3: # %bb.0: 1648; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 1649; SSSE3-NEXT: retq 1650; 1651; SSE41-LABEL: shuffle_v16i8_00_zz_zz_zz_zz_zz_zz_zz_01_zz_zz_zz_zz_zz_zz_zz: 1652; SSE41: # %bb.0: 1653; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 1654; SSE41-NEXT: retq 1655; 1656; AVX-LABEL: shuffle_v16i8_00_zz_zz_zz_zz_zz_zz_zz_01_zz_zz_zz_zz_zz_zz_zz: 1657; AVX: # %bb.0: 1658; AVX-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 1659; AVX-NEXT: retq 1660 %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 1, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 1661 ret <16 x i8> %shuffle 1662} 1663 1664define <16 x i8> @shuffle_v16i8_00_uu_uu_uu_01_uu_uu_uu_02_uu_uu_uu_03_uu_uu_uu(<16 x i8> %a) { 1665; SSE2-LABEL: shuffle_v16i8_00_uu_uu_uu_01_uu_uu_uu_02_uu_uu_uu_03_uu_uu_uu: 1666; SSE2: # %bb.0: 1667; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1668; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 1669; SSE2-NEXT: retq 1670; 1671; SSSE3-LABEL: shuffle_v16i8_00_uu_uu_uu_01_uu_uu_uu_02_uu_uu_uu_03_uu_uu_uu: 1672; SSSE3: # %bb.0: 1673; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1674; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 1675; SSSE3-NEXT: retq 1676; 1677; SSE41-LABEL: shuffle_v16i8_00_uu_uu_uu_01_uu_uu_uu_02_uu_uu_uu_03_uu_uu_uu: 1678; SSE41: # %bb.0: 1679; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 1680; SSE41-NEXT: retq 1681; 1682; AVX-LABEL: shuffle_v16i8_00_uu_uu_uu_01_uu_uu_uu_02_uu_uu_uu_03_uu_uu_uu: 1683; AVX: # %bb.0: 1684; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 1685; AVX-NEXT: retq 1686 %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 1, i32 poison, i32 poison, i32 poison, i32 2, i32 poison, i32 poison, i32 poison, i32 3, i32 poison, i32 poison, i32 poison> 1687 ret <16 x i8> %shuffle 1688} 1689 1690define <16 x i8> @shuffle_v16i8_00_zz_zz_zz_01_zz_zz_zz_02_zz_zz_zz_03_zz_zz_zz(<16 x i8> %a) { 1691; SSE2-LABEL: shuffle_v16i8_00_zz_zz_zz_01_zz_zz_zz_02_zz_zz_zz_03_zz_zz_zz: 1692; SSE2: # %bb.0: 1693; SSE2-NEXT: pxor %xmm1, %xmm1 1694; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1695; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1696; SSE2-NEXT: retq 1697; 1698; SSSE3-LABEL: shuffle_v16i8_00_zz_zz_zz_01_zz_zz_zz_02_zz_zz_zz_03_zz_zz_zz: 1699; SSSE3: # %bb.0: 1700; SSSE3-NEXT: pxor %xmm1, %xmm1 1701; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1702; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1703; SSSE3-NEXT: retq 1704; 1705; SSE41-LABEL: shuffle_v16i8_00_zz_zz_zz_01_zz_zz_zz_02_zz_zz_zz_03_zz_zz_zz: 1706; SSE41: # %bb.0: 1707; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 1708; SSE41-NEXT: retq 1709; 1710; AVX-LABEL: shuffle_v16i8_00_zz_zz_zz_01_zz_zz_zz_02_zz_zz_zz_03_zz_zz_zz: 1711; AVX: # %bb.0: 1712; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 1713; AVX-NEXT: retq 1714 %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 17, i32 18, i32 19, i32 1, i32 21, i32 22, i32 23, i32 2, i32 25, i32 26, i32 27, i32 3, i32 29, i32 30, i32 31> 1715 ret <16 x i8> %shuffle 1716} 1717 1718define <16 x i8> @shuffle_v16i8_00_uu_01_uu_02_uu_03_uu_04_uu_05_uu_06_uu_07_uu(<16 x i8> %a) { 1719; SSE2-LABEL: shuffle_v16i8_00_uu_01_uu_02_uu_03_uu_04_uu_05_uu_06_uu_07_uu: 1720; SSE2: # %bb.0: 1721; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1722; SSE2-NEXT: retq 1723; 1724; SSSE3-LABEL: shuffle_v16i8_00_uu_01_uu_02_uu_03_uu_04_uu_05_uu_06_uu_07_uu: 1725; SSSE3: # %bb.0: 1726; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1727; SSSE3-NEXT: retq 1728; 1729; SSE41-LABEL: shuffle_v16i8_00_uu_01_uu_02_uu_03_uu_04_uu_05_uu_06_uu_07_uu: 1730; SSE41: # %bb.0: 1731; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 1732; SSE41-NEXT: retq 1733; 1734; AVX-LABEL: shuffle_v16i8_00_uu_01_uu_02_uu_03_uu_04_uu_05_uu_06_uu_07_uu: 1735; AVX: # %bb.0: 1736; AVX-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 1737; AVX-NEXT: retq 1738 %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 poison, i32 1, i32 poison, i32 2, i32 poison, i32 3, i32 poison, i32 4, i32 poison, i32 5, i32 poison, i32 6, i32 poison, i32 7, i32 poison> 1739 ret <16 x i8> %shuffle 1740} 1741 1742define <16 x i8> @shuffle_v16i8_00_zz_01_zz_02_zz_03_zz_04_zz_05_zz_06_zz_07_zz(<16 x i8> %a) { 1743; SSE2-LABEL: shuffle_v16i8_00_zz_01_zz_02_zz_03_zz_04_zz_05_zz_06_zz_07_zz: 1744; SSE2: # %bb.0: 1745; SSE2-NEXT: pxor %xmm1, %xmm1 1746; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1747; SSE2-NEXT: retq 1748; 1749; SSSE3-LABEL: shuffle_v16i8_00_zz_01_zz_02_zz_03_zz_04_zz_05_zz_06_zz_07_zz: 1750; SSSE3: # %bb.0: 1751; SSSE3-NEXT: pxor %xmm1, %xmm1 1752; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1753; SSSE3-NEXT: retq 1754; 1755; SSE41-LABEL: shuffle_v16i8_00_zz_01_zz_02_zz_03_zz_04_zz_05_zz_06_zz_07_zz: 1756; SSE41: # %bb.0: 1757; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 1758; SSE41-NEXT: retq 1759; 1760; AVX-LABEL: shuffle_v16i8_00_zz_01_zz_02_zz_03_zz_04_zz_05_zz_06_zz_07_zz: 1761; AVX: # %bb.0: 1762; AVX-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 1763; AVX-NEXT: retq 1764 %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 17, i32 1, i32 19, i32 2, i32 21, i32 3, i32 23, i32 4, i32 25, i32 5, i32 27, i32 6, i32 29, i32 7, i32 31> 1765 ret <16 x i8> %shuffle 1766} 1767 1768define <16 x i8> @shuffle_v16i8_uu_10_02_07_22_14_07_02_18_03_01_14_18_09_11_00(<16 x i8> %a, <16 x i8> %b) { 1769; SSE2-LABEL: shuffle_v16i8_uu_10_02_07_22_14_07_02_18_03_01_14_18_09_11_00: 1770; SSE2: # %bb.0: # %entry 1771; SSE2-NEXT: pxor %xmm2, %xmm2 1772; SSE2-NEXT: movdqa %xmm0, %xmm3 1773; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] 1774; SSE2-NEXT: pshuflw {{.*#+}} xmm4 = xmm3[1,3,2,0,4,5,6,7] 1775; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,0,2,1] 1776; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [65535,65535,65535,0,65535,0,0,65535] 1777; SSE2-NEXT: pand %xmm5, %xmm4 1778; SSE2-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm2[8],xmm0[9],xmm2[9],xmm0[10],xmm2[10],xmm0[11],xmm2[11],xmm0[12],xmm2[12],xmm0[13],xmm2[13],xmm0[14],xmm2[14],xmm0[15],xmm2[15] 1779; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,3,0,1] 1780; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[2,2,2,2,4,5,6,7] 1781; SSE2-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,5,7,7] 1782; SSE2-NEXT: pandn %xmm2, %xmm5 1783; SSE2-NEXT: por %xmm4, %xmm5 1784; SSE2-NEXT: psrlq $16, %xmm0 1785; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,2,2,3] 1786; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[3,1,1,3] 1787; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,2,1,4,5,6,7] 1788; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,4] 1789; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,3,2,3] 1790; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] 1791; SSE2-NEXT: packuswb %xmm5, %xmm2 1792; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [255,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255] 1793; SSE2-NEXT: pand %xmm0, %xmm2 1794; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[3,1,1,3,4,5,6,7] 1795; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] 1796; SSE2-NEXT: pandn %xmm1, %xmm0 1797; SSE2-NEXT: por %xmm2, %xmm0 1798; SSE2-NEXT: retq 1799; 1800; SSSE3-LABEL: shuffle_v16i8_uu_10_02_07_22_14_07_02_18_03_01_14_18_09_11_00: 1801; SSSE3: # %bb.0: # %entry 1802; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[u],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[2],zero,zero,zero 1803; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[u,10,2,7],zero,xmm0[14,7,2],zero,xmm0[3,1,14],zero,xmm0[9,11,0] 1804; SSSE3-NEXT: por %xmm1, %xmm0 1805; SSSE3-NEXT: retq 1806; 1807; SSE41-LABEL: shuffle_v16i8_uu_10_02_07_22_14_07_02_18_03_01_14_18_09_11_00: 1808; SSE41: # %bb.0: # %entry 1809; SSE41-NEXT: pshufb {{.*#+}} xmm1 = xmm1[u],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[2],zero,zero,zero 1810; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[u,10,2,7],zero,xmm0[14,7,2],zero,xmm0[3,1,14],zero,xmm0[9,11,0] 1811; SSE41-NEXT: por %xmm1, %xmm0 1812; SSE41-NEXT: retq 1813; 1814; AVX1-LABEL: shuffle_v16i8_uu_10_02_07_22_14_07_02_18_03_01_14_18_09_11_00: 1815; AVX1: # %bb.0: # %entry 1816; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[u],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[2],zero,zero,zero 1817; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[u,10,2,7],zero,xmm0[14,7,2],zero,xmm0[3,1,14],zero,xmm0[9,11,0] 1818; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 1819; AVX1-NEXT: retq 1820; 1821; AVX2-LABEL: shuffle_v16i8_uu_10_02_07_22_14_07_02_18_03_01_14_18_09_11_00: 1822; AVX2: # %bb.0: # %entry 1823; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[u],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[2],zero,zero,zero 1824; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[u,10,2,7],zero,xmm0[14,7,2],zero,xmm0[3,1,14],zero,xmm0[9,11,0] 1825; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 1826; AVX2-NEXT: retq 1827; 1828; AVX512VLBW-LABEL: shuffle_v16i8_uu_10_02_07_22_14_07_02_18_03_01_14_18_09_11_00: 1829; AVX512VLBW: # %bb.0: # %entry 1830; AVX512VLBW-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[u],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[2],zero,zero,zero 1831; AVX512VLBW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[u,10,2,7],zero,xmm0[14,7,2],zero,xmm0[3,1,14],zero,xmm0[9,11,0] 1832; AVX512VLBW-NEXT: vpor %xmm1, %xmm0, %xmm0 1833; AVX512VLBW-NEXT: retq 1834; 1835; AVX512VLVBMI-LABEL: shuffle_v16i8_uu_10_02_07_22_14_07_02_18_03_01_14_18_09_11_00: 1836; AVX512VLVBMI: # %bb.0: # %entry 1837; AVX512VLVBMI-NEXT: vmovdqa {{.*#+}} xmm2 = [u,10,2,7,22,14,7,2,18,3,1,14,18,9,11,0] 1838; AVX512VLVBMI-NEXT: vpermt2b %xmm1, %xmm2, %xmm0 1839; AVX512VLVBMI-NEXT: retq 1840; 1841; XOP-LABEL: shuffle_v16i8_uu_10_02_07_22_14_07_02_18_03_01_14_18_09_11_00: 1842; XOP: # %bb.0: # %entry 1843; XOP-NEXT: vpperm {{.*#+}} xmm0 = xmm0[u,10,2,7],xmm1[6],xmm0[14,7,2],xmm1[2],xmm0[3,1,14],xmm1[2],xmm0[9,11,0] 1844; XOP-NEXT: retq 1845entry: 1846 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 poison, i32 10, i32 2, i32 7, i32 22, i32 14, i32 7, i32 2, i32 18, i32 3, i32 1, i32 14, i32 18, i32 9, i32 11, i32 0> 1847 1848 ret <16 x i8> %shuffle 1849} 1850 1851define <16 x i8> @shuffe_v16i8_shift_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30(<8 x i16> %a0, <8 x i16> %a1) { 1852; SSE-LABEL: shuffe_v16i8_shift_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30: 1853; SSE: # %bb.0: 1854; SSE-NEXT: psrlw $8, %xmm0 1855; SSE-NEXT: psrlw $8, %xmm1 1856; SSE-NEXT: packuswb %xmm1, %xmm0 1857; SSE-NEXT: retq 1858; 1859; AVX1-LABEL: shuffe_v16i8_shift_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30: 1860; AVX1: # %bb.0: 1861; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 1862; AVX1-NEXT: vpsrlw $8, %xmm1, %xmm1 1863; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 1864; AVX1-NEXT: retq 1865; 1866; AVX2OR512VL-LABEL: shuffe_v16i8_shift_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30: 1867; AVX2OR512VL: # %bb.0: 1868; AVX2OR512VL-NEXT: vpsrlw $8, %xmm0, %xmm0 1869; AVX2OR512VL-NEXT: vpsrlw $8, %xmm1, %xmm1 1870; AVX2OR512VL-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 1871; AVX2OR512VL-NEXT: retq 1872; 1873; XOP-LABEL: shuffe_v16i8_shift_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30: 1874; XOP: # %bb.0: 1875; XOP-NEXT: vpperm {{.*#+}} xmm0 = xmm0[1,3,5,7,9,11,13,15],xmm1[1,3,5,7,9,11,13,15] 1876; XOP-NEXT: retq 1877 %1 = lshr <8 x i16> %a0, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> 1878 %2 = lshr <8 x i16> %a1, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> 1879 %3 = bitcast <8 x i16> %1 to <16 x i8> 1880 %4 = bitcast <8 x i16> %2 to <16 x i8> 1881 %5 = shufflevector <16 x i8> %3, <16 x i8> %4, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30> 1882 ret <16 x i8> %5 1883} 1884 1885define <16 x i8> @stress_test2(<16 x i8> %s.0.0, <16 x i8> %s.0.1, <16 x i8> %s.0.2) { 1886; Nothing interesting to test here. Just make sure we didn't crashe. 1887; ALL-LABEL: stress_test2: 1888; ALL: retq 1889entry: 1890 %s.1.0 = shufflevector <16 x i8> %s.0.0, <16 x i8> %s.0.1, <16 x i32> <i32 29, i32 30, i32 2, i32 16, i32 26, i32 21, i32 11, i32 26, i32 26, i32 3, i32 4, i32 5, i32 30, i32 28, i32 15, i32 5> 1891 %s.1.1 = shufflevector <16 x i8> %s.0.1, <16 x i8> %s.0.2, <16 x i32> <i32 31, i32 1, i32 24, i32 12, i32 28, i32 5, i32 2, i32 9, i32 29, i32 1, i32 31, i32 5, i32 6, i32 17, i32 15, i32 22> 1892 %s.2.0 = shufflevector <16 x i8> %s.1.0, <16 x i8> %s.1.1, <16 x i32> <i32 22, i32 1, i32 12, i32 3, i32 30, i32 4, i32 30, i32 poison, i32 1, i32 10, i32 14, i32 18, i32 27, i32 13, i32 16, i32 19> 1893 1894 ret <16 x i8> %s.2.0 1895} 1896 1897define void @constant_gets_selected(ptr %ptr1, ptr %ptr2) { 1898; SSE-LABEL: constant_gets_selected: 1899; SSE: # %bb.0: # %entry 1900; SSE-NEXT: xorps %xmm0, %xmm0 1901; SSE-NEXT: movaps %xmm0, (%rdi) 1902; SSE-NEXT: movaps %xmm0, (%rsi) 1903; SSE-NEXT: retq 1904; 1905; AVX-LABEL: constant_gets_selected: 1906; AVX: # %bb.0: # %entry 1907; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 1908; AVX-NEXT: vmovaps %xmm0, (%rdi) 1909; AVX-NEXT: vmovaps %xmm0, (%rsi) 1910; AVX-NEXT: retq 1911entry: 1912 %weird_zero = bitcast <4 x i32> zeroinitializer to <16 x i8> 1913 %shuffle.i = shufflevector <16 x i8> <i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 0, i8 0, i8 0, i8 0>, <16 x i8> %weird_zero, <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27> 1914 %weirder_zero = bitcast <16 x i8> %shuffle.i to <4 x i32> 1915 store <4 x i32> %weirder_zero, ptr %ptr1, align 16 1916 store <4 x i32> zeroinitializer, ptr %ptr2, align 16 1917 ret void 1918} 1919 1920; 1921; Shuffle to logical bit shifts 1922; 1923 1924define <16 x i8> @shuffle_v16i8_zz_00_zz_02_zz_04_zz_06_zz_08_zz_10_zz_12_zz_14(<16 x i8> %a, <16 x i8> %b) { 1925; SSE-LABEL: shuffle_v16i8_zz_00_zz_02_zz_04_zz_06_zz_08_zz_10_zz_12_zz_14: 1926; SSE: # %bb.0: 1927; SSE-NEXT: psllw $8, %xmm0 1928; SSE-NEXT: retq 1929; 1930; AVX-LABEL: shuffle_v16i8_zz_00_zz_02_zz_04_zz_06_zz_08_zz_10_zz_12_zz_14: 1931; AVX: # %bb.0: 1932; AVX-NEXT: vpsllw $8, %xmm0, %xmm0 1933; AVX-NEXT: retq 1934 %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32><i32 16, i32 0, i32 16, i32 2, i32 16, i32 4, i32 16, i32 6, i32 16, i32 8, i32 16, i32 10, i32 16, i32 12, i32 16, i32 14> 1935 ret <16 x i8> %shuffle 1936} 1937 1938define <16 x i8> @shuffle_v16i8_zz_zz_zz_00_zz_zz_zz_04_zz_zz_zz_08_zz_zz_zz_12(<16 x i8> %a, <16 x i8> %b) { 1939; SSE-LABEL: shuffle_v16i8_zz_zz_zz_00_zz_zz_zz_04_zz_zz_zz_08_zz_zz_zz_12: 1940; SSE: # %bb.0: 1941; SSE-NEXT: pslld $24, %xmm0 1942; SSE-NEXT: retq 1943; 1944; AVX-LABEL: shuffle_v16i8_zz_zz_zz_00_zz_zz_zz_04_zz_zz_zz_08_zz_zz_zz_12: 1945; AVX: # %bb.0: 1946; AVX-NEXT: vpslld $24, %xmm0, %xmm0 1947; AVX-NEXT: retq 1948 %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32><i32 16, i32 16, i32 16, i32 0, i32 16, i32 16, i32 16, i32 4, i32 16, i32 16, i32 16, i32 8, i32 16, i32 16, i32 16, i32 12> 1949 ret <16 x i8> %shuffle 1950} 1951 1952define <16 x i8> @shuffle_v16i8_zz_zz_zz_zz_zz_zz_zz_00_zz_zz_zz_zz_zz_zz_zz_08(<16 x i8> %a, <16 x i8> %b) { 1953; SSE-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_zz_zz_00_zz_zz_zz_zz_zz_zz_zz_08: 1954; SSE: # %bb.0: 1955; SSE-NEXT: psllq $56, %xmm0 1956; SSE-NEXT: retq 1957; 1958; AVX-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_zz_zz_00_zz_zz_zz_zz_zz_zz_zz_08: 1959; AVX: # %bb.0: 1960; AVX-NEXT: vpsllq $56, %xmm0, %xmm0 1961; AVX-NEXT: retq 1962 %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32><i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 0, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 8> 1963 ret <16 x i8> %shuffle 1964} 1965 1966define <16 x i8> @shuffle_v16i8_zz_00_uu_02_03_uu_05_06_zz_08_09_uu_11_12_13_14(<16 x i8> %a, <16 x i8> %b) { 1967; SSE-LABEL: shuffle_v16i8_zz_00_uu_02_03_uu_05_06_zz_08_09_uu_11_12_13_14: 1968; SSE: # %bb.0: 1969; SSE-NEXT: psllq $8, %xmm0 1970; SSE-NEXT: retq 1971; 1972; AVX-LABEL: shuffle_v16i8_zz_00_uu_02_03_uu_05_06_zz_08_09_uu_11_12_13_14: 1973; AVX: # %bb.0: 1974; AVX-NEXT: vpsllq $8, %xmm0, %xmm0 1975; AVX-NEXT: retq 1976 %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32><i32 16, i32 0, i32 poison, i32 2, i32 3, i32 poison, i32 5, i32 6, i32 16, i32 8, i32 9, i32 poison, i32 11, i32 12, i32 13, i32 14> 1977 ret <16 x i8> %shuffle 1978} 1979 1980define <16 x i8> @shuffle_v16i8_01_uu_uu_uu_uu_zz_uu_zz_uu_zz_11_zz_13_zz_15_zz(<16 x i8> %a, <16 x i8> %b) { 1981; SSE-LABEL: shuffle_v16i8_01_uu_uu_uu_uu_zz_uu_zz_uu_zz_11_zz_13_zz_15_zz: 1982; SSE: # %bb.0: 1983; SSE-NEXT: psrlw $8, %xmm0 1984; SSE-NEXT: retq 1985; 1986; AVX-LABEL: shuffle_v16i8_01_uu_uu_uu_uu_zz_uu_zz_uu_zz_11_zz_13_zz_15_zz: 1987; AVX: # %bb.0: 1988; AVX-NEXT: vpsrlw $8, %xmm0, %xmm0 1989; AVX-NEXT: retq 1990 %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32><i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 16, i32 poison, i32 16, i32 poison, i32 16, i32 11, i32 16, i32 13, i32 16, i32 15, i32 16> 1991 ret <16 x i8> %shuffle 1992} 1993 1994define <16 x i8> @shuffle_v16i8_02_03_zz_zz_06_07_uu_uu_uu_uu_uu_uu_14_15_zz_zz(<16 x i8> %a, <16 x i8> %b) { 1995; SSE-LABEL: shuffle_v16i8_02_03_zz_zz_06_07_uu_uu_uu_uu_uu_uu_14_15_zz_zz: 1996; SSE: # %bb.0: 1997; SSE-NEXT: psrld $16, %xmm0 1998; SSE-NEXT: retq 1999; 2000; AVX-LABEL: shuffle_v16i8_02_03_zz_zz_06_07_uu_uu_uu_uu_uu_uu_14_15_zz_zz: 2001; AVX: # %bb.0: 2002; AVX-NEXT: vpsrld $16, %xmm0, %xmm0 2003; AVX-NEXT: retq 2004 %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32><i32 2, i32 3, i32 16, i32 16, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 14, i32 15, i32 16, i32 16> 2005 ret <16 x i8> %shuffle 2006} 2007 2008define <16 x i8> @shuffle_v16i8_07_zz_zz_zz_zz_zz_uu_uu_15_uu_uu_uu_uu_uu_zz_zz(<16 x i8> %a, <16 x i8> %b) { 2009; SSE-LABEL: shuffle_v16i8_07_zz_zz_zz_zz_zz_uu_uu_15_uu_uu_uu_uu_uu_zz_zz: 2010; SSE: # %bb.0: 2011; SSE-NEXT: psrlq $56, %xmm0 2012; SSE-NEXT: retq 2013; 2014; AVX-LABEL: shuffle_v16i8_07_zz_zz_zz_zz_zz_uu_uu_15_uu_uu_uu_uu_uu_zz_zz: 2015; AVX: # %bb.0: 2016; AVX-NEXT: vpsrlq $56, %xmm0, %xmm0 2017; AVX-NEXT: retq 2018 %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32><i32 7, i32 16, i32 16, i32 16, i32 16, i32 16, i32 poison, i32 poison, i32 15, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 16, i32 16> 2019 ret <16 x i8> %shuffle 2020} 2021 2022define <16 x i8> @shuffle_v16i8_zz_zz_zz_zz_01_02_03_04_05_06_zz_zz_zz_zz_zz_zz(<16 x i8> %a) { 2023; SSE2-LABEL: shuffle_v16i8_zz_zz_zz_zz_01_02_03_04_05_06_zz_zz_zz_zz_zz_zz: 2024; SSE2: # %bb.0: 2025; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6] 2026; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 2027; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] 2028; SSE2-NEXT: retq 2029; 2030; SSSE3-LABEL: shuffle_v16i8_zz_zz_zz_zz_01_02_03_04_05_06_zz_zz_zz_zz_zz_zz: 2031; SSSE3: # %bb.0: 2032; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[1,2,3,4,5,6],zero,zero,zero,zero,zero,zero 2033; SSSE3-NEXT: retq 2034; 2035; SSE41-LABEL: shuffle_v16i8_zz_zz_zz_zz_01_02_03_04_05_06_zz_zz_zz_zz_zz_zz: 2036; SSE41: # %bb.0: 2037; SSE41-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[1,2,3,4,5,6],zero,zero,zero,zero,zero,zero 2038; SSE41-NEXT: retq 2039; 2040; AVX-LABEL: shuffle_v16i8_zz_zz_zz_zz_01_02_03_04_05_06_zz_zz_zz_zz_zz_zz: 2041; AVX: # %bb.0: 2042; AVX-NEXT: vpshufb {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[1,2,3,4,5,6],zero,zero,zero,zero,zero,zero 2043; AVX-NEXT: retq 2044 %shuffle = shufflevector <16 x i8> %a, <16 x i8> <i8 0, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison>, <16 x i32> <i32 16, i32 16, i32 16, i32 16, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16> 2045 ret <16 x i8> %shuffle 2046} 2047 2048define <16 x i8> @shuffle_v16i8_01_02_03_04_05_06_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz(<16 x i8> %a) { 2049; SSE-LABEL: shuffle_v16i8_01_02_03_04_05_06_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz: 2050; SSE: # %bb.0: 2051; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6] 2052; SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 2053; SSE-NEXT: retq 2054; 2055; AVX1-LABEL: shuffle_v16i8_01_02_03_04_05_06_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz: 2056; AVX1: # %bb.0: 2057; AVX1-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6] 2058; AVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 2059; AVX1-NEXT: retq 2060; 2061; AVX2-SLOW-LABEL: shuffle_v16i8_01_02_03_04_05_06_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz: 2062; AVX2-SLOW: # %bb.0: 2063; AVX2-SLOW-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6] 2064; AVX2-SLOW-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 2065; AVX2-SLOW-NEXT: retq 2066; 2067; AVX2-FAST-LABEL: shuffle_v16i8_01_02_03_04_05_06_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz: 2068; AVX2-FAST: # %bb.0: 2069; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 2070; AVX2-FAST-NEXT: retq 2071; 2072; AVX512VL-LABEL: shuffle_v16i8_01_02_03_04_05_06_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz: 2073; AVX512VL: # %bb.0: 2074; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 2075; AVX512VL-NEXT: retq 2076; 2077; XOP-LABEL: shuffle_v16i8_01_02_03_04_05_06_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz: 2078; XOP: # %bb.0: 2079; XOP-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6] 2080; XOP-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 2081; XOP-NEXT: retq 2082 %shuffle = shufflevector <16 x i8> %a, <16 x i8> <i8 0, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison>, <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16> 2083 ret <16 x i8> %shuffle 2084} 2085 2086define <16 x i8> @shuffle_v16i8_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_01_02_03_04_05_06(<16 x i8> %a) { 2087; SSE-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_01_02_03_04_05_06: 2088; SSE: # %bb.0: 2089; SSE-NEXT: psrlq $8, %xmm0 2090; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5] 2091; SSE-NEXT: retq 2092; 2093; AVX1-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_01_02_03_04_05_06: 2094; AVX1: # %bb.0: 2095; AVX1-NEXT: vpsrlq $8, %xmm0, %xmm0 2096; AVX1-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5] 2097; AVX1-NEXT: retq 2098; 2099; AVX2-SLOW-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_01_02_03_04_05_06: 2100; AVX2-SLOW: # %bb.0: 2101; AVX2-SLOW-NEXT: vpsrlq $8, %xmm0, %xmm0 2102; AVX2-SLOW-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5] 2103; AVX2-SLOW-NEXT: retq 2104; 2105; AVX2-FAST-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_01_02_03_04_05_06: 2106; AVX2-FAST: # %bb.0: 2107; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[1,2,3,4,5,6] 2108; AVX2-FAST-NEXT: retq 2109; 2110; AVX512VL-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_01_02_03_04_05_06: 2111; AVX512VL: # %bb.0: 2112; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[1,2,3,4,5,6] 2113; AVX512VL-NEXT: retq 2114; 2115; XOP-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_01_02_03_04_05_06: 2116; XOP: # %bb.0: 2117; XOP-NEXT: vpsrlq $8, %xmm0, %xmm0 2118; XOP-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5] 2119; XOP-NEXT: retq 2120 %shuffle = shufflevector <16 x i8> %a, <16 x i8> <i8 0, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison>, <16 x i32> <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6> 2121 ret <16 x i8> %shuffle 2122} 2123 2124define <16 x i8> @shuffle_v16i8_03_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14(<16 x i8> %a) { 2125; SSE2-LABEL: shuffle_v16i8_03_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14: 2126; SSE2: # %bb.0: 2127; SSE2-NEXT: movdqa %xmm0, %xmm1 2128; SSE2-NEXT: psrld $24, %xmm1 2129; SSE2-NEXT: pslld $8, %xmm0 2130; SSE2-NEXT: por %xmm1, %xmm0 2131; SSE2-NEXT: retq 2132; 2133; SSSE3-LABEL: shuffle_v16i8_03_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14: 2134; SSSE3: # %bb.0: 2135; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14] 2136; SSSE3-NEXT: retq 2137; 2138; SSE41-LABEL: shuffle_v16i8_03_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14: 2139; SSE41: # %bb.0: 2140; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14] 2141; SSE41-NEXT: retq 2142; 2143; AVX1-LABEL: shuffle_v16i8_03_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14: 2144; AVX1: # %bb.0: 2145; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14] 2146; AVX1-NEXT: retq 2147; 2148; AVX2-LABEL: shuffle_v16i8_03_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14: 2149; AVX2: # %bb.0: 2150; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14] 2151; AVX2-NEXT: retq 2152; 2153; AVX512VL-LABEL: shuffle_v16i8_03_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14: 2154; AVX512VL: # %bb.0: 2155; AVX512VL-NEXT: vprold $8, %xmm0, %xmm0 2156; AVX512VL-NEXT: retq 2157; 2158; XOP-LABEL: shuffle_v16i8_03_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14: 2159; XOP: # %bb.0: 2160; XOP-NEXT: vprotd $8, %xmm0, %xmm0 2161; XOP-NEXT: retq 2162 %shuffle = shufflevector <16 x i8> %a, <16 x i8> poison, <16 x i32> <i32 3, i32 0, i32 1, i32 2, i32 7, i32 4, i32 5, i32 6, i32 11, i32 8, i32 9, i32 10, i32 15, i32 12, i32 13, i32 14> 2163 ret <16 x i8> %shuffle 2164} 2165 2166; PR44379 2167define <16 x i8> @shuffle_v16i8_02_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09(<16 x i8> %a) { 2168; SSE-LABEL: shuffle_v16i8_02_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09: 2169; SSE: # %bb.0: 2170; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,2,3,0,4,5,6,7] 2171; SSE-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,6,7,4] 2172; SSE-NEXT: retq 2173; 2174; AVX1-LABEL: shuffle_v16i8_02_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09: 2175; AVX1: # %bb.0: 2176; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,2,3,0,4,5,6,7] 2177; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,6,7,4] 2178; AVX1-NEXT: retq 2179; 2180; AVX2-SLOW-LABEL: shuffle_v16i8_02_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09: 2181; AVX2-SLOW: # %bb.0: 2182; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,2,3,0,4,5,6,7] 2183; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,6,7,4] 2184; AVX2-SLOW-NEXT: retq 2185; 2186; AVX2-FAST-LABEL: shuffle_v16i8_02_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09: 2187; AVX2-FAST: # %bb.0: 2188; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,0,1,10,11,12,13,14,15,8,9] 2189; AVX2-FAST-NEXT: retq 2190; 2191; AVX512VL-LABEL: shuffle_v16i8_02_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09: 2192; AVX512VL: # %bb.0: 2193; AVX512VL-NEXT: vprolq $48, %xmm0, %xmm0 2194; AVX512VL-NEXT: retq 2195; 2196; XOP-LABEL: shuffle_v16i8_02_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09: 2197; XOP: # %bb.0: 2198; XOP-NEXT: vprotq $48, %xmm0, %xmm0 2199; XOP-NEXT: retq 2200 %shuffle = shufflevector <16 x i8> %a, <16 x i8> poison, <16 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9> 2201 ret <16 x i8> %shuffle 2202} 2203 2204define <16 x i8> @PR12412(<16 x i8> %inval1, <16 x i8> %inval2) { 2205; SSE2-LABEL: PR12412: 2206; SSE2: # %bb.0: # %entry 2207; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] 2208; SSE2-NEXT: pand %xmm2, %xmm1 2209; SSE2-NEXT: pand %xmm2, %xmm0 2210; SSE2-NEXT: packuswb %xmm1, %xmm0 2211; SSE2-NEXT: retq 2212; 2213; SSSE3-LABEL: PR12412: 2214; SSSE3: # %bb.0: # %entry 2215; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] 2216; SSSE3-NEXT: pand %xmm2, %xmm1 2217; SSSE3-NEXT: pand %xmm2, %xmm0 2218; SSSE3-NEXT: packuswb %xmm1, %xmm0 2219; SSSE3-NEXT: retq 2220; 2221; SSE41-LABEL: PR12412: 2222; SSE41: # %bb.0: # %entry 2223; SSE41-NEXT: pmovzxbw {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] 2224; SSE41-NEXT: pand %xmm2, %xmm1 2225; SSE41-NEXT: pand %xmm2, %xmm0 2226; SSE41-NEXT: packuswb %xmm1, %xmm0 2227; SSE41-NEXT: retq 2228; 2229; AVX1-LABEL: PR12412: 2230; AVX1: # %bb.0: # %entry 2231; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] 2232; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1 2233; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 2234; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 2235; AVX1-NEXT: retq 2236; 2237; AVX2-LABEL: PR12412: 2238; AVX2: # %bb.0: # %entry 2239; AVX2-NEXT: vpbroadcastw {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] 2240; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1 2241; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0 2242; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 2243; AVX2-NEXT: retq 2244; 2245; AVX512VL-LABEL: PR12412: 2246; AVX512VL: # %bb.0: # %entry 2247; AVX512VL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 2248; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 2249; AVX512VL-NEXT: vpmovwb %ymm0, %xmm0 2250; AVX512VL-NEXT: vzeroupper 2251; AVX512VL-NEXT: retq 2252; 2253; XOP-LABEL: PR12412: 2254; XOP: # %bb.0: # %entry 2255; XOP-NEXT: vpperm {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],xmm1[0,2,4,6,8,10,12,14] 2256; XOP-NEXT: retq 2257entry: 2258 %0 = shufflevector <16 x i8> %inval1, <16 x i8> %inval2, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30> 2259 ret <16 x i8> %0 2260} 2261 2262define <16 x i8> @shuffle_v16i8_uu_02_03_zz_uu_06_07_zz_uu_10_11_zz_uu_14_15_zz(<16 x i8> %a) { 2263; SSE-LABEL: shuffle_v16i8_uu_02_03_zz_uu_06_07_zz_uu_10_11_zz_uu_14_15_zz: 2264; SSE: # %bb.0: 2265; SSE-NEXT: psrld $8, %xmm0 2266; SSE-NEXT: retq 2267; 2268; AVX-LABEL: shuffle_v16i8_uu_02_03_zz_uu_06_07_zz_uu_10_11_zz_uu_14_15_zz: 2269; AVX: # %bb.0: 2270; AVX-NEXT: vpsrld $8, %xmm0, %xmm0 2271; AVX-NEXT: retq 2272 %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 poison, i32 2, i32 3, i32 16, i32 poison, i32 6, i32 7, i32 16, i32 poison, i32 10, i32 11, i32 16, i32 poison, i32 14, i32 15, i32 16> 2273 ret <16 x i8> %shuffle 2274} 2275 2276define <16 x i8> @shuffle_v16i8_bitcast_unpack(<16 x i8> %a, <16 x i8> %b) { 2277; SSE-LABEL: shuffle_v16i8_bitcast_unpack: 2278; SSE: # %bb.0: 2279; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 2280; SSE-NEXT: retq 2281; 2282; AVX-LABEL: shuffle_v16i8_bitcast_unpack: 2283; AVX: # %bb.0: 2284; AVX-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 2285; AVX-NEXT: retq 2286 %shuffle8 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 7, i32 23, i32 6, i32 22, i32 5, i32 21, i32 4, i32 20, i32 3, i32 19, i32 2, i32 18, i32 1, i32 17, i32 0, i32 16> 2287 %bitcast32 = bitcast <16 x i8> %shuffle8 to <4 x float> 2288 %shuffle32 = shufflevector <4 x float> %bitcast32, <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 2289 %bitcast16 = bitcast <4 x float> %shuffle32 to <8 x i16> 2290 %shuffle16 = shufflevector <8 x i16> %bitcast16, <8 x i16> poison, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6> 2291 %bitcast8 = bitcast <8 x i16> %shuffle16 to <16 x i8> 2292 ret <16 x i8> %bitcast8 2293} 2294 2295define <16 x i8> @insert_dup_mem_v16i8_i32(ptr %ptr) { 2296; SSE2-LABEL: insert_dup_mem_v16i8_i32: 2297; SSE2: # %bb.0: 2298; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2299; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 2300; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 2301; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 2302; SSE2-NEXT: retq 2303; 2304; SSSE3-LABEL: insert_dup_mem_v16i8_i32: 2305; SSSE3: # %bb.0: 2306; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2307; SSSE3-NEXT: pxor %xmm1, %xmm1 2308; SSSE3-NEXT: pshufb %xmm1, %xmm0 2309; SSSE3-NEXT: retq 2310; 2311; SSE41-LABEL: insert_dup_mem_v16i8_i32: 2312; SSE41: # %bb.0: 2313; SSE41-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2314; SSE41-NEXT: pxor %xmm1, %xmm1 2315; SSE41-NEXT: pshufb %xmm1, %xmm0 2316; SSE41-NEXT: retq 2317; 2318; AVX1-LABEL: insert_dup_mem_v16i8_i32: 2319; AVX1: # %bb.0: 2320; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2321; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 2322; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0 2323; AVX1-NEXT: retq 2324; 2325; AVX2OR512VL-LABEL: insert_dup_mem_v16i8_i32: 2326; AVX2OR512VL: # %bb.0: 2327; AVX2OR512VL-NEXT: vpbroadcastb (%rdi), %xmm0 2328; AVX2OR512VL-NEXT: retq 2329; 2330; XOPAVX1-LABEL: insert_dup_mem_v16i8_i32: 2331; XOPAVX1: # %bb.0: 2332; XOPAVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2333; XOPAVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 2334; XOPAVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0 2335; XOPAVX1-NEXT: retq 2336; 2337; XOPAVX2-LABEL: insert_dup_mem_v16i8_i32: 2338; XOPAVX2: # %bb.0: 2339; XOPAVX2-NEXT: vpbroadcastb (%rdi), %xmm0 2340; XOPAVX2-NEXT: retq 2341 %tmp = load i32, ptr %ptr, align 4 2342 %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0 2343 %tmp2 = bitcast <4 x i32> %tmp1 to <16 x i8> 2344 %tmp3 = shufflevector <16 x i8> %tmp2, <16 x i8> poison, <16 x i32> zeroinitializer 2345 ret <16 x i8> %tmp3 2346} 2347 2348define <16 x i8> @insert_dup_mem_v16i8_sext_i8(ptr %ptr) { 2349; SSE2-LABEL: insert_dup_mem_v16i8_sext_i8: 2350; SSE2: # %bb.0: 2351; SSE2-NEXT: movzbl (%rdi), %eax 2352; SSE2-NEXT: movd %eax, %xmm0 2353; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 2354; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 2355; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 2356; SSE2-NEXT: retq 2357; 2358; SSSE3-LABEL: insert_dup_mem_v16i8_sext_i8: 2359; SSSE3: # %bb.0: 2360; SSSE3-NEXT: movzbl (%rdi), %eax 2361; SSSE3-NEXT: movd %eax, %xmm0 2362; SSSE3-NEXT: pxor %xmm1, %xmm1 2363; SSSE3-NEXT: pshufb %xmm1, %xmm0 2364; SSSE3-NEXT: retq 2365; 2366; SSE41-LABEL: insert_dup_mem_v16i8_sext_i8: 2367; SSE41: # %bb.0: 2368; SSE41-NEXT: movzbl (%rdi), %eax 2369; SSE41-NEXT: movd %eax, %xmm0 2370; SSE41-NEXT: pxor %xmm1, %xmm1 2371; SSE41-NEXT: pshufb %xmm1, %xmm0 2372; SSE41-NEXT: retq 2373; 2374; AVX1-LABEL: insert_dup_mem_v16i8_sext_i8: 2375; AVX1: # %bb.0: 2376; AVX1-NEXT: movzbl (%rdi), %eax 2377; AVX1-NEXT: vmovd %eax, %xmm0 2378; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 2379; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0 2380; AVX1-NEXT: retq 2381; 2382; AVX2OR512VL-LABEL: insert_dup_mem_v16i8_sext_i8: 2383; AVX2OR512VL: # %bb.0: 2384; AVX2OR512VL-NEXT: vpbroadcastb (%rdi), %xmm0 2385; AVX2OR512VL-NEXT: retq 2386; 2387; XOPAVX1-LABEL: insert_dup_mem_v16i8_sext_i8: 2388; XOPAVX1: # %bb.0: 2389; XOPAVX1-NEXT: movzbl (%rdi), %eax 2390; XOPAVX1-NEXT: vmovd %eax, %xmm0 2391; XOPAVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 2392; XOPAVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0 2393; XOPAVX1-NEXT: retq 2394; 2395; XOPAVX2-LABEL: insert_dup_mem_v16i8_sext_i8: 2396; XOPAVX2: # %bb.0: 2397; XOPAVX2-NEXT: vpbroadcastb (%rdi), %xmm0 2398; XOPAVX2-NEXT: retq 2399 %tmp = load i8, ptr %ptr, align 1 2400 %tmp1 = sext i8 %tmp to i32 2401 %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0 2402 %tmp3 = bitcast <4 x i32> %tmp2 to <16 x i8> 2403 %tmp4 = shufflevector <16 x i8> %tmp3, <16 x i8> poison, <16 x i32> zeroinitializer 2404 ret <16 x i8> %tmp4 2405} 2406 2407define <16 x i8> @insert_dup_elt1_mem_v16i8_i32(ptr %ptr) { 2408; SSE2-LABEL: insert_dup_elt1_mem_v16i8_i32: 2409; SSE2: # %bb.0: 2410; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2411; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 2412; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7] 2413; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 2414; SSE2-NEXT: retq 2415; 2416; SSSE3-LABEL: insert_dup_elt1_mem_v16i8_i32: 2417; SSSE3: # %bb.0: 2418; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2419; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] 2420; SSSE3-NEXT: retq 2421; 2422; SSE41-LABEL: insert_dup_elt1_mem_v16i8_i32: 2423; SSE41: # %bb.0: 2424; SSE41-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2425; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] 2426; SSE41-NEXT: retq 2427; 2428; AVX1-LABEL: insert_dup_elt1_mem_v16i8_i32: 2429; AVX1: # %bb.0: 2430; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2431; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] 2432; AVX1-NEXT: retq 2433; 2434; AVX2OR512VL-LABEL: insert_dup_elt1_mem_v16i8_i32: 2435; AVX2OR512VL: # %bb.0: 2436; AVX2OR512VL-NEXT: vpbroadcastb 1(%rdi), %xmm0 2437; AVX2OR512VL-NEXT: retq 2438; 2439; XOPAVX1-LABEL: insert_dup_elt1_mem_v16i8_i32: 2440; XOPAVX1: # %bb.0: 2441; XOPAVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2442; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] 2443; XOPAVX1-NEXT: retq 2444; 2445; XOPAVX2-LABEL: insert_dup_elt1_mem_v16i8_i32: 2446; XOPAVX2: # %bb.0: 2447; XOPAVX2-NEXT: vpbroadcastb 1(%rdi), %xmm0 2448; XOPAVX2-NEXT: retq 2449 %tmp = load i32, ptr %ptr, align 4 2450 %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0 2451 %tmp2 = bitcast <4 x i32> %tmp1 to <16 x i8> 2452 %tmp3 = shufflevector <16 x i8> %tmp2, <16 x i8> poison, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 2453 ret <16 x i8> %tmp3 2454} 2455 2456define <16 x i8> @insert_dup_elt2_mem_v16i8_i32(ptr %ptr) { 2457; SSE2-LABEL: insert_dup_elt2_mem_v16i8_i32: 2458; SSE2: # %bb.0: 2459; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2460; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 2461; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,2,2,2,4,5,6,7] 2462; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 2463; SSE2-NEXT: retq 2464; 2465; SSSE3-LABEL: insert_dup_elt2_mem_v16i8_i32: 2466; SSSE3: # %bb.0: 2467; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2468; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2] 2469; SSSE3-NEXT: retq 2470; 2471; SSE41-LABEL: insert_dup_elt2_mem_v16i8_i32: 2472; SSE41: # %bb.0: 2473; SSE41-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2474; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2] 2475; SSE41-NEXT: retq 2476; 2477; AVX1-LABEL: insert_dup_elt2_mem_v16i8_i32: 2478; AVX1: # %bb.0: 2479; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2480; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2] 2481; AVX1-NEXT: retq 2482; 2483; AVX2OR512VL-LABEL: insert_dup_elt2_mem_v16i8_i32: 2484; AVX2OR512VL: # %bb.0: 2485; AVX2OR512VL-NEXT: vpbroadcastb 2(%rdi), %xmm0 2486; AVX2OR512VL-NEXT: retq 2487; 2488; XOPAVX1-LABEL: insert_dup_elt2_mem_v16i8_i32: 2489; XOPAVX1: # %bb.0: 2490; XOPAVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2491; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2] 2492; XOPAVX1-NEXT: retq 2493; 2494; XOPAVX2-LABEL: insert_dup_elt2_mem_v16i8_i32: 2495; XOPAVX2: # %bb.0: 2496; XOPAVX2-NEXT: vpbroadcastb 2(%rdi), %xmm0 2497; XOPAVX2-NEXT: retq 2498 %tmp = load i32, ptr %ptr, align 4 2499 %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0 2500 %tmp2 = bitcast <4 x i32> %tmp1 to <16 x i8> 2501 %tmp3 = shufflevector <16 x i8> %tmp2, <16 x i8> poison, <16 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2> 2502 ret <16 x i8> %tmp3 2503} 2504 2505define <16 x i8> @insert_dup_elt1_mem_v16i8_sext_i8(ptr %ptr) { 2506; SSE2-LABEL: insert_dup_elt1_mem_v16i8_sext_i8: 2507; SSE2: # %bb.0: 2508; SSE2-NEXT: movsbl (%rdi), %eax 2509; SSE2-NEXT: movd %eax, %xmm0 2510; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 2511; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7] 2512; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 2513; SSE2-NEXT: retq 2514; 2515; SSSE3-LABEL: insert_dup_elt1_mem_v16i8_sext_i8: 2516; SSSE3: # %bb.0: 2517; SSSE3-NEXT: movsbl (%rdi), %eax 2518; SSSE3-NEXT: movd %eax, %xmm0 2519; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] 2520; SSSE3-NEXT: retq 2521; 2522; SSE41-LABEL: insert_dup_elt1_mem_v16i8_sext_i8: 2523; SSE41: # %bb.0: 2524; SSE41-NEXT: movsbl (%rdi), %eax 2525; SSE41-NEXT: movd %eax, %xmm0 2526; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] 2527; SSE41-NEXT: retq 2528; 2529; AVX1-LABEL: insert_dup_elt1_mem_v16i8_sext_i8: 2530; AVX1: # %bb.0: 2531; AVX1-NEXT: movsbl (%rdi), %eax 2532; AVX1-NEXT: vmovd %eax, %xmm0 2533; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] 2534; AVX1-NEXT: retq 2535; 2536; AVX2-LABEL: insert_dup_elt1_mem_v16i8_sext_i8: 2537; AVX2: # %bb.0: 2538; AVX2-NEXT: movsbl (%rdi), %eax 2539; AVX2-NEXT: shrl $8, %eax 2540; AVX2-NEXT: vmovd %eax, %xmm0 2541; AVX2-NEXT: vpbroadcastb %xmm0, %xmm0 2542; AVX2-NEXT: retq 2543; 2544; AVX512VL-LABEL: insert_dup_elt1_mem_v16i8_sext_i8: 2545; AVX512VL: # %bb.0: 2546; AVX512VL-NEXT: movsbl (%rdi), %eax 2547; AVX512VL-NEXT: shrl $8, %eax 2548; AVX512VL-NEXT: vpbroadcastb %eax, %xmm0 2549; AVX512VL-NEXT: retq 2550; 2551; XOPAVX1-LABEL: insert_dup_elt1_mem_v16i8_sext_i8: 2552; XOPAVX1: # %bb.0: 2553; XOPAVX1-NEXT: movsbl (%rdi), %eax 2554; XOPAVX1-NEXT: vmovd %eax, %xmm0 2555; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] 2556; XOPAVX1-NEXT: retq 2557; 2558; XOPAVX2-LABEL: insert_dup_elt1_mem_v16i8_sext_i8: 2559; XOPAVX2: # %bb.0: 2560; XOPAVX2-NEXT: movsbl (%rdi), %eax 2561; XOPAVX2-NEXT: shrl $8, %eax 2562; XOPAVX2-NEXT: vmovd %eax, %xmm0 2563; XOPAVX2-NEXT: vpbroadcastb %xmm0, %xmm0 2564; XOPAVX2-NEXT: retq 2565 %tmp = load i8, ptr %ptr, align 1 2566 %tmp1 = sext i8 %tmp to i32 2567 %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0 2568 %tmp3 = bitcast <4 x i32> %tmp2 to <16 x i8> 2569 %tmp4 = shufflevector <16 x i8> %tmp3, <16 x i8> poison, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 2570 ret <16 x i8> %tmp4 2571} 2572 2573define <16 x i8> @insert_dup_elt2_mem_v16i8_sext_i8(ptr %ptr) { 2574; SSE2-LABEL: insert_dup_elt2_mem_v16i8_sext_i8: 2575; SSE2: # %bb.0: 2576; SSE2-NEXT: movsbl (%rdi), %eax 2577; SSE2-NEXT: movd %eax, %xmm0 2578; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 2579; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,2,2,2,4,5,6,7] 2580; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 2581; SSE2-NEXT: retq 2582; 2583; SSSE3-LABEL: insert_dup_elt2_mem_v16i8_sext_i8: 2584; SSSE3: # %bb.0: 2585; SSSE3-NEXT: movsbl (%rdi), %eax 2586; SSSE3-NEXT: movd %eax, %xmm0 2587; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2] 2588; SSSE3-NEXT: retq 2589; 2590; SSE41-LABEL: insert_dup_elt2_mem_v16i8_sext_i8: 2591; SSE41: # %bb.0: 2592; SSE41-NEXT: movsbl (%rdi), %eax 2593; SSE41-NEXT: movd %eax, %xmm0 2594; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2] 2595; SSE41-NEXT: retq 2596; 2597; AVX1-LABEL: insert_dup_elt2_mem_v16i8_sext_i8: 2598; AVX1: # %bb.0: 2599; AVX1-NEXT: movsbl (%rdi), %eax 2600; AVX1-NEXT: vmovd %eax, %xmm0 2601; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2] 2602; AVX1-NEXT: retq 2603; 2604; AVX2-LABEL: insert_dup_elt2_mem_v16i8_sext_i8: 2605; AVX2: # %bb.0: 2606; AVX2-NEXT: movsbl (%rdi), %eax 2607; AVX2-NEXT: shrl $16, %eax 2608; AVX2-NEXT: vmovd %eax, %xmm0 2609; AVX2-NEXT: vpbroadcastb %xmm0, %xmm0 2610; AVX2-NEXT: retq 2611; 2612; AVX512VL-LABEL: insert_dup_elt2_mem_v16i8_sext_i8: 2613; AVX512VL: # %bb.0: 2614; AVX512VL-NEXT: movsbl (%rdi), %eax 2615; AVX512VL-NEXT: shrl $16, %eax 2616; AVX512VL-NEXT: vpbroadcastb %eax, %xmm0 2617; AVX512VL-NEXT: retq 2618; 2619; XOPAVX1-LABEL: insert_dup_elt2_mem_v16i8_sext_i8: 2620; XOPAVX1: # %bb.0: 2621; XOPAVX1-NEXT: movsbl (%rdi), %eax 2622; XOPAVX1-NEXT: vmovd %eax, %xmm0 2623; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2] 2624; XOPAVX1-NEXT: retq 2625; 2626; XOPAVX2-LABEL: insert_dup_elt2_mem_v16i8_sext_i8: 2627; XOPAVX2: # %bb.0: 2628; XOPAVX2-NEXT: movsbl (%rdi), %eax 2629; XOPAVX2-NEXT: shrl $16, %eax 2630; XOPAVX2-NEXT: vmovd %eax, %xmm0 2631; XOPAVX2-NEXT: vpbroadcastb %xmm0, %xmm0 2632; XOPAVX2-NEXT: retq 2633 %tmp = load i8, ptr %ptr, align 1 2634 %tmp1 = sext i8 %tmp to i32 2635 %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0 2636 %tmp3 = bitcast <4 x i32> %tmp2 to <16 x i8> 2637 %tmp4 = shufflevector <16 x i8> %tmp3, <16 x i8> poison, <16 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2> 2638 ret <16 x i8> %tmp4 2639} 2640 2641define <16 x i8> @PR31364(ptr nocapture readonly %a, ptr nocapture readonly %b) { 2642; SSE2-LABEL: PR31364: 2643; SSE2: # %bb.0: 2644; SSE2-NEXT: movzbl (%rdi), %eax 2645; SSE2-NEXT: movzbl (%rsi), %ecx 2646; SSE2-NEXT: shll $8, %ecx 2647; SSE2-NEXT: orl %eax, %ecx 2648; SSE2-NEXT: movd %ecx, %xmm1 2649; SSE2-NEXT: pxor %xmm0, %xmm0 2650; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 2651; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[1,1,1,3,4,5,6,7] 2652; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,1] 2653; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] 2654; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[1,1,1,1,4,5,6,7] 2655; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,5,4,4,4] 2656; SSE2-NEXT: packuswb %xmm1, %xmm0 2657; SSE2-NEXT: retq 2658; 2659; SSSE3-LABEL: PR31364: 2660; SSSE3: # %bb.0: 2661; SSSE3-NEXT: movzbl (%rdi), %eax 2662; SSSE3-NEXT: movzbl (%rsi), %ecx 2663; SSSE3-NEXT: shll $8, %ecx 2664; SSSE3-NEXT: orl %eax, %ecx 2665; SSSE3-NEXT: movd %ecx, %xmm0 2666; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1],zero,xmm0[1,1,1,1,1,0,0,0] 2667; SSSE3-NEXT: retq 2668; 2669; SSE41-LABEL: PR31364: 2670; SSE41: # %bb.0: 2671; SSE41-NEXT: movzbl (%rdi), %eax 2672; SSE41-NEXT: movd %eax, %xmm0 2673; SSE41-NEXT: pinsrb $1, (%rsi), %xmm0 2674; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1],zero,xmm0[1,1,1,1,1,0,0,0] 2675; SSE41-NEXT: retq 2676; 2677; AVX-LABEL: PR31364: 2678; AVX: # %bb.0: 2679; AVX-NEXT: movzbl (%rdi), %eax 2680; AVX-NEXT: vmovd %eax, %xmm0 2681; AVX-NEXT: vpinsrb $1, (%rsi), %xmm0, %xmm0 2682; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1],zero,xmm0[1,1,1,1,1,0,0,0] 2683; AVX-NEXT: retq 2684 %v0 = load i8, ptr %a, align 1 2685 %vecins = insertelement <16 x i8> <i8 poison, i8 poison, i8 poison, i8 0, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison>, i8 %v0, i32 0 2686 %v1 = load i8, ptr %b, align 1 2687 %vecins2 = insertelement <16 x i8> %vecins, i8 %v1, i32 1 2688 %result = shufflevector <16 x i8> %vecins2, <16 x i8> poison, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 3, i32 1, i32 1, i32 1, i32 1, i32 1, i32 0, i32 0, i32 0> 2689 ret <16 x i8> %result 2690} 2691 2692define <16 x i8> @PR31301(ptr nocapture readonly %x, ptr nocapture readonly %y) { 2693; SSE2-LABEL: PR31301: 2694; SSE2: # %bb.0: # %entry 2695; SSE2-NEXT: movzbl (%rdi), %eax 2696; SSE2-NEXT: movd %eax, %xmm0 2697; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 2698; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 2699; SSE2-NEXT: movzbl (%rsi), %eax 2700; SSE2-NEXT: movd %eax, %xmm1 2701; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 2702; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7] 2703; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 2704; SSE2-NEXT: retq 2705; 2706; SSSE3-LABEL: PR31301: 2707; SSSE3: # %bb.0: # %entry 2708; SSSE3-NEXT: movzbl (%rdi), %eax 2709; SSSE3-NEXT: movd %eax, %xmm0 2710; SSSE3-NEXT: pxor %xmm1, %xmm1 2711; SSSE3-NEXT: pshufb %xmm1, %xmm0 2712; SSSE3-NEXT: movzbl (%rsi), %eax 2713; SSSE3-NEXT: movd %eax, %xmm2 2714; SSSE3-NEXT: pshufb %xmm1, %xmm2 2715; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] 2716; SSSE3-NEXT: retq 2717; 2718; SSE41-LABEL: PR31301: 2719; SSE41: # %bb.0: # %entry 2720; SSE41-NEXT: movzbl (%rdi), %eax 2721; SSE41-NEXT: movd %eax, %xmm0 2722; SSE41-NEXT: pxor %xmm1, %xmm1 2723; SSE41-NEXT: pshufb %xmm1, %xmm0 2724; SSE41-NEXT: movzbl (%rsi), %eax 2725; SSE41-NEXT: movd %eax, %xmm2 2726; SSE41-NEXT: pshufb %xmm1, %xmm2 2727; SSE41-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] 2728; SSE41-NEXT: retq 2729; 2730; AVX1-LABEL: PR31301: 2731; AVX1: # %bb.0: # %entry 2732; AVX1-NEXT: movzbl (%rdi), %eax 2733; AVX1-NEXT: vmovd %eax, %xmm0 2734; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 2735; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0 2736; AVX1-NEXT: movzbl (%rsi), %eax 2737; AVX1-NEXT: vmovd %eax, %xmm2 2738; AVX1-NEXT: vpshufb %xmm1, %xmm2, %xmm1 2739; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 2740; AVX1-NEXT: retq 2741; 2742; AVX2OR512VL-LABEL: PR31301: 2743; AVX2OR512VL: # %bb.0: # %entry 2744; AVX2OR512VL-NEXT: vpbroadcastb (%rdi), %xmm0 2745; AVX2OR512VL-NEXT: vpbroadcastb (%rsi), %xmm1 2746; AVX2OR512VL-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 2747; AVX2OR512VL-NEXT: retq 2748; 2749; XOPAVX1-LABEL: PR31301: 2750; XOPAVX1: # %bb.0: # %entry 2751; XOPAVX1-NEXT: movzbl (%rdi), %eax 2752; XOPAVX1-NEXT: vmovd %eax, %xmm0 2753; XOPAVX1-NEXT: movzbl (%rsi), %eax 2754; XOPAVX1-NEXT: vmovd %eax, %xmm1 2755; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[0],xmm1[0],xmm0[0],xmm1[0],xmm0[0],xmm1[0],xmm0[0],xmm1[0],xmm0[0],xmm1[0],xmm0[0],xmm1[0],xmm0[0],xmm1[0] 2756; XOPAVX1-NEXT: retq 2757; 2758; XOPAVX2-LABEL: PR31301: 2759; XOPAVX2: # %bb.0: # %entry 2760; XOPAVX2-NEXT: vpbroadcastb (%rdi), %xmm0 2761; XOPAVX2-NEXT: vpbroadcastb (%rsi), %xmm1 2762; XOPAVX2-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 2763; XOPAVX2-NEXT: retq 2764entry: 2765 %0 = load i8, ptr %x, align 1 2766 %1 = insertelement <16 x i8> poison, i8 %0, i32 0 2767 %lane = shufflevector <16 x i8> %1, <16 x i8> poison, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 2768 %2 = load i8, ptr %y, align 1 2769 %3 = insertelement <16 x i8> poison, i8 %2, i32 0 2770 %lane3 = shufflevector <16 x i8> %3, <16 x i8> poison, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 2771 %vzip.i = shufflevector <16 x i8> %lane, <16 x i8> %lane3, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23> 2772 ret <16 x i8> %vzip.i 2773} 2774 2775define <8 x i16> @PR104482(<16 x i8> %i) { 2776; SSE2-LABEL: PR104482: 2777; SSE2: # %bb.0: 2778; SSE2-NEXT: pxor %xmm1, %xmm1 2779; SSE2-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] 2780; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 2781; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7] 2782; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4] 2783; SSE2-NEXT: packuswb %xmm0, %xmm0 2784; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 2785; SSE2-NEXT: retq 2786; 2787; SSSE3-LABEL: PR104482: 2788; SSSE3: # %bb.0: 2789; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[15],zero,xmm0[14],zero,xmm0[13],zero,xmm0[12],zero,xmm0[11],zero,xmm0[10],zero,xmm0[9],zero,xmm0[8],zero 2790; SSSE3-NEXT: retq 2791; 2792; SSE41-LABEL: PR104482: 2793; SSE41: # %bb.0: 2794; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[15],zero,xmm0[14],zero,xmm0[13],zero,xmm0[12],zero,xmm0[11],zero,xmm0[10],zero,xmm0[9],zero,xmm0[8],zero 2795; SSE41-NEXT: retq 2796; 2797; AVX-LABEL: PR104482: 2798; AVX: # %bb.0: 2799; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[15],zero,xmm0[14],zero,xmm0[13],zero,xmm0[12],zero,xmm0[11],zero,xmm0[10],zero,xmm0[9],zero,xmm0[8],zero 2800; AVX-NEXT: retq 2801 %i7 = shufflevector <16 x i8> %i, <16 x i8> <i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31> 2802 %1 = bitcast <16 x i8> %i7 to <8 x i16> 2803 %i10 = shufflevector <8 x i16> %1, <8 x i16> poison, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3> 2804 %i11 = shufflevector <8 x i16> %i10, <8 x i16> poison, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7> 2805 %i12 = shufflevector <8 x i16> %i11, <8 x i16> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 6, i32 5, i32 4> 2806 ret <8 x i16> %i12 2807} 2808