1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX1 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX2 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx | FileCheck %s --check-prefix=XOPAVX1 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx2 | FileCheck %s --check-prefix=XOPAVX2 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq | FileCheck %s --check-prefixes=AVX512,AVX512DQ 7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW 8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl | FileCheck %s --check-prefixes=AVX512VL,AVX512DQVL 9; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl | FileCheck %s --check-prefixes=AVX512VL,AVX512BWVL 10; 11; 32-bit runs to make sure we do reasonable things for i64 shifts. 12; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=X86-AVX1 13; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X86-AVX2 14 15; 16; Variable Shifts 17; 18 19define <4 x i64> @var_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind { 20; AVX1-LABEL: var_shift_v4i64: 21; AVX1: # %bb.0: 22; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 23; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 24; AVX1-NEXT: vpsllq %xmm2, %xmm3, %xmm4 25; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,2,3] 26; AVX1-NEXT: vpsllq %xmm2, %xmm3, %xmm2 27; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm4[0,1,2,3],xmm2[4,5,6,7] 28; AVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm3 29; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] 30; AVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm0 31; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm3[0,1,2,3],xmm0[4,5,6,7] 32; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 33; AVX1-NEXT: retq 34; 35; AVX2-LABEL: var_shift_v4i64: 36; AVX2: # %bb.0: 37; AVX2-NEXT: vpsllvq %ymm1, %ymm0, %ymm0 38; AVX2-NEXT: retq 39; 40; XOPAVX1-LABEL: var_shift_v4i64: 41; XOPAVX1: # %bb.0: 42; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 43; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 44; XOPAVX1-NEXT: vpshlq %xmm2, %xmm3, %xmm2 45; XOPAVX1-NEXT: vpshlq %xmm1, %xmm0, %xmm0 46; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 47; XOPAVX1-NEXT: retq 48; 49; XOPAVX2-LABEL: var_shift_v4i64: 50; XOPAVX2: # %bb.0: 51; XOPAVX2-NEXT: vpsllvq %ymm1, %ymm0, %ymm0 52; XOPAVX2-NEXT: retq 53; 54; AVX512-LABEL: var_shift_v4i64: 55; AVX512: # %bb.0: 56; AVX512-NEXT: vpsllvq %ymm1, %ymm0, %ymm0 57; AVX512-NEXT: retq 58; 59; AVX512VL-LABEL: var_shift_v4i64: 60; AVX512VL: # %bb.0: 61; AVX512VL-NEXT: vpsllvq %ymm1, %ymm0, %ymm0 62; AVX512VL-NEXT: retq 63; 64; X86-AVX1-LABEL: var_shift_v4i64: 65; X86-AVX1: # %bb.0: 66; X86-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 67; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 68; X86-AVX1-NEXT: vpsllq %xmm2, %xmm3, %xmm4 69; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,2,3] 70; X86-AVX1-NEXT: vpsllq %xmm2, %xmm3, %xmm2 71; X86-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm4[0,1,2,3],xmm2[4,5,6,7] 72; X86-AVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm3 73; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] 74; X86-AVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm0 75; X86-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm3[0,1,2,3],xmm0[4,5,6,7] 76; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 77; X86-AVX1-NEXT: retl 78; 79; X86-AVX2-LABEL: var_shift_v4i64: 80; X86-AVX2: # %bb.0: 81; X86-AVX2-NEXT: vpsllvq %ymm1, %ymm0, %ymm0 82; X86-AVX2-NEXT: retl 83 %shift = shl <4 x i64> %a, %b 84 ret <4 x i64> %shift 85} 86 87define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind { 88; AVX1-LABEL: var_shift_v8i32: 89; AVX1: # %bb.0: 90; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 91; AVX1-NEXT: vpslld $23, %xmm2, %xmm2 92; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [1065353216,1065353216,1065353216,1065353216] 93; AVX1-NEXT: vpaddd %xmm3, %xmm2, %xmm2 94; AVX1-NEXT: vcvttps2dq %xmm2, %xmm2 95; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 96; AVX1-NEXT: vpmulld %xmm2, %xmm4, %xmm2 97; AVX1-NEXT: vpslld $23, %xmm1, %xmm1 98; AVX1-NEXT: vpaddd %xmm3, %xmm1, %xmm1 99; AVX1-NEXT: vcvttps2dq %xmm1, %xmm1 100; AVX1-NEXT: vpmulld %xmm1, %xmm0, %xmm0 101; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 102; AVX1-NEXT: retq 103; 104; AVX2-LABEL: var_shift_v8i32: 105; AVX2: # %bb.0: 106; AVX2-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 107; AVX2-NEXT: retq 108; 109; XOPAVX1-LABEL: var_shift_v8i32: 110; XOPAVX1: # %bb.0: 111; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 112; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 113; XOPAVX1-NEXT: vpshld %xmm2, %xmm3, %xmm2 114; XOPAVX1-NEXT: vpshld %xmm1, %xmm0, %xmm0 115; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 116; XOPAVX1-NEXT: retq 117; 118; XOPAVX2-LABEL: var_shift_v8i32: 119; XOPAVX2: # %bb.0: 120; XOPAVX2-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 121; XOPAVX2-NEXT: retq 122; 123; AVX512-LABEL: var_shift_v8i32: 124; AVX512: # %bb.0: 125; AVX512-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 126; AVX512-NEXT: retq 127; 128; AVX512VL-LABEL: var_shift_v8i32: 129; AVX512VL: # %bb.0: 130; AVX512VL-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 131; AVX512VL-NEXT: retq 132; 133; X86-AVX1-LABEL: var_shift_v8i32: 134; X86-AVX1: # %bb.0: 135; X86-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 136; X86-AVX1-NEXT: vpslld $23, %xmm2, %xmm2 137; X86-AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [1065353216,1065353216,1065353216,1065353216] 138; X86-AVX1-NEXT: vpaddd %xmm3, %xmm2, %xmm2 139; X86-AVX1-NEXT: vcvttps2dq %xmm2, %xmm2 140; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 141; X86-AVX1-NEXT: vpmulld %xmm2, %xmm4, %xmm2 142; X86-AVX1-NEXT: vpslld $23, %xmm1, %xmm1 143; X86-AVX1-NEXT: vpaddd %xmm3, %xmm1, %xmm1 144; X86-AVX1-NEXT: vcvttps2dq %xmm1, %xmm1 145; X86-AVX1-NEXT: vpmulld %xmm1, %xmm0, %xmm0 146; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 147; X86-AVX1-NEXT: retl 148; 149; X86-AVX2-LABEL: var_shift_v8i32: 150; X86-AVX2: # %bb.0: 151; X86-AVX2-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 152; X86-AVX2-NEXT: retl 153 %shift = shl <8 x i32> %a, %b 154 ret <8 x i32> %shift 155} 156 157define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind { 158; AVX1-LABEL: var_shift_v16i16: 159; AVX1: # %bb.0: 160; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 161; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm3 = xmm2[4,4,5,5,6,6,7,7] 162; AVX1-NEXT: vpslld $23, %xmm3, %xmm3 163; AVX1-NEXT: vbroadcastss {{.*#+}} xmm4 = [1065353216,1065353216,1065353216,1065353216] 164; AVX1-NEXT: vpaddd %xmm4, %xmm3, %xmm3 165; AVX1-NEXT: vcvttps2dq %xmm3, %xmm3 166; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero 167; AVX1-NEXT: vpslld $23, %xmm2, %xmm2 168; AVX1-NEXT: vpaddd %xmm4, %xmm2, %xmm2 169; AVX1-NEXT: vcvttps2dq %xmm2, %xmm2 170; AVX1-NEXT: vpackusdw %xmm3, %xmm2, %xmm2 171; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 172; AVX1-NEXT: vpmullw %xmm2, %xmm3, %xmm2 173; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm3 = xmm1[4,4,5,5,6,6,7,7] 174; AVX1-NEXT: vpslld $23, %xmm3, %xmm3 175; AVX1-NEXT: vpaddd %xmm4, %xmm3, %xmm3 176; AVX1-NEXT: vcvttps2dq %xmm3, %xmm3 177; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero 178; AVX1-NEXT: vpslld $23, %xmm1, %xmm1 179; AVX1-NEXT: vpaddd %xmm4, %xmm1, %xmm1 180; AVX1-NEXT: vcvttps2dq %xmm1, %xmm1 181; AVX1-NEXT: vpackusdw %xmm3, %xmm1, %xmm1 182; AVX1-NEXT: vpmullw %xmm1, %xmm0, %xmm0 183; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 184; AVX1-NEXT: retq 185; 186; AVX2-LABEL: var_shift_v16i16: 187; AVX2: # %bb.0: 188; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 189; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15] 190; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15] 191; AVX2-NEXT: vpsllvd %ymm3, %ymm4, %ymm3 192; AVX2-NEXT: vpsrld $16, %ymm3, %ymm3 193; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11] 194; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11] 195; AVX2-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 196; AVX2-NEXT: vpsrld $16, %ymm0, %ymm0 197; AVX2-NEXT: vpackusdw %ymm3, %ymm0, %ymm0 198; AVX2-NEXT: retq 199; 200; XOPAVX1-LABEL: var_shift_v16i16: 201; XOPAVX1: # %bb.0: 202; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 203; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 204; XOPAVX1-NEXT: vpshlw %xmm2, %xmm3, %xmm2 205; XOPAVX1-NEXT: vpshlw %xmm1, %xmm0, %xmm0 206; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 207; XOPAVX1-NEXT: retq 208; 209; XOPAVX2-LABEL: var_shift_v16i16: 210; XOPAVX2: # %bb.0: 211; XOPAVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 212; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm3 213; XOPAVX2-NEXT: vpshlw %xmm2, %xmm3, %xmm2 214; XOPAVX2-NEXT: vpshlw %xmm1, %xmm0, %xmm0 215; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 216; XOPAVX2-NEXT: retq 217; 218; AVX512DQ-LABEL: var_shift_v16i16: 219; AVX512DQ: # %bb.0: 220; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero 221; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero 222; AVX512DQ-NEXT: vpsllvd %zmm1, %zmm0, %zmm0 223; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0 224; AVX512DQ-NEXT: retq 225; 226; AVX512BW-LABEL: var_shift_v16i16: 227; AVX512BW: # %bb.0: 228; AVX512BW-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 229; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 230; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0 231; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 232; AVX512BW-NEXT: retq 233; 234; AVX512DQVL-LABEL: var_shift_v16i16: 235; AVX512DQVL: # %bb.0: 236; AVX512DQVL-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero 237; AVX512DQVL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero 238; AVX512DQVL-NEXT: vpsllvd %zmm1, %zmm0, %zmm0 239; AVX512DQVL-NEXT: vpmovdw %zmm0, %ymm0 240; AVX512DQVL-NEXT: retq 241; 242; AVX512BWVL-LABEL: var_shift_v16i16: 243; AVX512BWVL: # %bb.0: 244; AVX512BWVL-NEXT: vpsllvw %ymm1, %ymm0, %ymm0 245; AVX512BWVL-NEXT: retq 246; 247; X86-AVX1-LABEL: var_shift_v16i16: 248; X86-AVX1: # %bb.0: 249; X86-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 250; X86-AVX1-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm3[4,4,5,5,6,6,7,7] 251; X86-AVX1-NEXT: vpslld $23, %xmm2, %xmm4 252; X86-AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [1065353216,1065353216,1065353216,1065353216] 253; X86-AVX1-NEXT: vpaddd %xmm2, %xmm4, %xmm4 254; X86-AVX1-NEXT: vcvttps2dq %xmm4, %xmm4 255; X86-AVX1-NEXT: vpmovzxwd {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero 256; X86-AVX1-NEXT: vpslld $23, %xmm3, %xmm3 257; X86-AVX1-NEXT: vpaddd %xmm2, %xmm3, %xmm3 258; X86-AVX1-NEXT: vcvttps2dq %xmm3, %xmm3 259; X86-AVX1-NEXT: vpackusdw %xmm4, %xmm3, %xmm3 260; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 261; X86-AVX1-NEXT: vpmullw %xmm3, %xmm4, %xmm3 262; X86-AVX1-NEXT: vpunpckhwd {{.*#+}} xmm4 = xmm1[4,4,5,5,6,6,7,7] 263; X86-AVX1-NEXT: vpslld $23, %xmm4, %xmm4 264; X86-AVX1-NEXT: vpaddd %xmm2, %xmm4, %xmm4 265; X86-AVX1-NEXT: vcvttps2dq %xmm4, %xmm4 266; X86-AVX1-NEXT: vpmovzxwd {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero 267; X86-AVX1-NEXT: vpslld $23, %xmm1, %xmm1 268; X86-AVX1-NEXT: vpaddd %xmm2, %xmm1, %xmm1 269; X86-AVX1-NEXT: vcvttps2dq %xmm1, %xmm1 270; X86-AVX1-NEXT: vpackusdw %xmm4, %xmm1, %xmm1 271; X86-AVX1-NEXT: vpmullw %xmm1, %xmm0, %xmm0 272; X86-AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 273; X86-AVX1-NEXT: retl 274; 275; X86-AVX2-LABEL: var_shift_v16i16: 276; X86-AVX2: # %bb.0: 277; X86-AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 278; X86-AVX2-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15] 279; X86-AVX2-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15] 280; X86-AVX2-NEXT: vpsllvd %ymm3, %ymm4, %ymm3 281; X86-AVX2-NEXT: vpsrld $16, %ymm3, %ymm3 282; X86-AVX2-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11] 283; X86-AVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11] 284; X86-AVX2-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 285; X86-AVX2-NEXT: vpsrld $16, %ymm0, %ymm0 286; X86-AVX2-NEXT: vpackusdw %ymm3, %ymm0, %ymm0 287; X86-AVX2-NEXT: retl 288 %shift = shl <16 x i16> %a, %b 289 ret <16 x i16> %shift 290} 291 292define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind { 293; AVX1-LABEL: var_shift_v32i8: 294; AVX1: # %bb.0: 295; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 296; AVX1-NEXT: vpsllw $4, %xmm2, %xmm3 297; AVX1-NEXT: vbroadcastss {{.*#+}} xmm4 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240] 298; AVX1-NEXT: vpand %xmm4, %xmm3, %xmm3 299; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm5 300; AVX1-NEXT: vpsllw $5, %xmm5, %xmm5 301; AVX1-NEXT: vpblendvb %xmm5, %xmm3, %xmm2, %xmm2 302; AVX1-NEXT: vpsllw $2, %xmm2, %xmm3 303; AVX1-NEXT: vbroadcastss {{.*#+}} xmm6 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252] 304; AVX1-NEXT: vpand %xmm6, %xmm3, %xmm3 305; AVX1-NEXT: vpaddb %xmm5, %xmm5, %xmm5 306; AVX1-NEXT: vpblendvb %xmm5, %xmm3, %xmm2, %xmm2 307; AVX1-NEXT: vpaddb %xmm2, %xmm2, %xmm3 308; AVX1-NEXT: vpaddb %xmm5, %xmm5, %xmm5 309; AVX1-NEXT: vpblendvb %xmm5, %xmm3, %xmm2, %xmm2 310; AVX1-NEXT: vpsllw $4, %xmm0, %xmm3 311; AVX1-NEXT: vpand %xmm4, %xmm3, %xmm3 312; AVX1-NEXT: vpsllw $5, %xmm1, %xmm1 313; AVX1-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0 314; AVX1-NEXT: vpsllw $2, %xmm0, %xmm3 315; AVX1-NEXT: vpand %xmm6, %xmm3, %xmm3 316; AVX1-NEXT: vpaddb %xmm1, %xmm1, %xmm1 317; AVX1-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0 318; AVX1-NEXT: vpaddb %xmm0, %xmm0, %xmm3 319; AVX1-NEXT: vpaddb %xmm1, %xmm1, %xmm1 320; AVX1-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0 321; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 322; AVX1-NEXT: retq 323; 324; AVX2-LABEL: var_shift_v32i8: 325; AVX2: # %bb.0: 326; AVX2-NEXT: vpsllw $5, %ymm1, %ymm1 327; AVX2-NEXT: vpsllw $4, %ymm0, %ymm2 328; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 329; AVX2-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 330; AVX2-NEXT: vpsllw $2, %ymm0, %ymm2 331; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 332; AVX2-NEXT: vpaddb %ymm1, %ymm1, %ymm1 333; AVX2-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 334; AVX2-NEXT: vpaddb %ymm0, %ymm0, %ymm2 335; AVX2-NEXT: vpaddb %ymm1, %ymm1, %ymm1 336; AVX2-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 337; AVX2-NEXT: retq 338; 339; XOPAVX1-LABEL: var_shift_v32i8: 340; XOPAVX1: # %bb.0: 341; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 342; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 343; XOPAVX1-NEXT: vpshlb %xmm2, %xmm3, %xmm2 344; XOPAVX1-NEXT: vpshlb %xmm1, %xmm0, %xmm0 345; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 346; XOPAVX1-NEXT: retq 347; 348; XOPAVX2-LABEL: var_shift_v32i8: 349; XOPAVX2: # %bb.0: 350; XOPAVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 351; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm3 352; XOPAVX2-NEXT: vpshlb %xmm2, %xmm3, %xmm2 353; XOPAVX2-NEXT: vpshlb %xmm1, %xmm0, %xmm0 354; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 355; XOPAVX2-NEXT: retq 356; 357; AVX512DQ-LABEL: var_shift_v32i8: 358; AVX512DQ: # %bb.0: 359; AVX512DQ-NEXT: vpsllw $5, %ymm1, %ymm1 360; AVX512DQ-NEXT: vpsllw $4, %ymm0, %ymm2 361; AVX512DQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 362; AVX512DQ-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 363; AVX512DQ-NEXT: vpsllw $2, %ymm0, %ymm2 364; AVX512DQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 365; AVX512DQ-NEXT: vpaddb %ymm1, %ymm1, %ymm1 366; AVX512DQ-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 367; AVX512DQ-NEXT: vpaddb %ymm0, %ymm0, %ymm2 368; AVX512DQ-NEXT: vpaddb %ymm1, %ymm1, %ymm1 369; AVX512DQ-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 370; AVX512DQ-NEXT: retq 371; 372; AVX512BW-LABEL: var_shift_v32i8: 373; AVX512BW: # %bb.0: 374; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero 375; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero 376; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0 377; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 378; AVX512BW-NEXT: retq 379; 380; AVX512DQVL-LABEL: var_shift_v32i8: 381; AVX512DQVL: # %bb.0: 382; AVX512DQVL-NEXT: vpsllw $5, %ymm1, %ymm1 383; AVX512DQVL-NEXT: vpsllw $4, %ymm0, %ymm2 384; AVX512DQVL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm2, %ymm2 385; AVX512DQVL-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 386; AVX512DQVL-NEXT: vpsllw $2, %ymm0, %ymm2 387; AVX512DQVL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm2, %ymm2 388; AVX512DQVL-NEXT: vpaddb %ymm1, %ymm1, %ymm1 389; AVX512DQVL-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 390; AVX512DQVL-NEXT: vpaddb %ymm0, %ymm0, %ymm2 391; AVX512DQVL-NEXT: vpaddb %ymm1, %ymm1, %ymm1 392; AVX512DQVL-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 393; AVX512DQVL-NEXT: retq 394; 395; AVX512BWVL-LABEL: var_shift_v32i8: 396; AVX512BWVL: # %bb.0: 397; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero 398; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero 399; AVX512BWVL-NEXT: vpsllvw %zmm1, %zmm0, %zmm0 400; AVX512BWVL-NEXT: vpmovwb %zmm0, %ymm0 401; AVX512BWVL-NEXT: retq 402; 403; X86-AVX1-LABEL: var_shift_v32i8: 404; X86-AVX1: # %bb.0: 405; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 406; X86-AVX1-NEXT: vpsllw $4, %xmm2, %xmm3 407; X86-AVX1-NEXT: vbroadcastss {{.*#+}} xmm4 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240] 408; X86-AVX1-NEXT: vpand %xmm4, %xmm3, %xmm3 409; X86-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm5 410; X86-AVX1-NEXT: vpsllw $5, %xmm5, %xmm5 411; X86-AVX1-NEXT: vpblendvb %xmm5, %xmm3, %xmm2, %xmm2 412; X86-AVX1-NEXT: vpsllw $2, %xmm2, %xmm3 413; X86-AVX1-NEXT: vbroadcastss {{.*#+}} xmm6 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252] 414; X86-AVX1-NEXT: vpand %xmm6, %xmm3, %xmm3 415; X86-AVX1-NEXT: vpaddb %xmm5, %xmm5, %xmm5 416; X86-AVX1-NEXT: vpblendvb %xmm5, %xmm3, %xmm2, %xmm2 417; X86-AVX1-NEXT: vpaddb %xmm2, %xmm2, %xmm3 418; X86-AVX1-NEXT: vpaddb %xmm5, %xmm5, %xmm5 419; X86-AVX1-NEXT: vpblendvb %xmm5, %xmm3, %xmm2, %xmm2 420; X86-AVX1-NEXT: vpsllw $4, %xmm0, %xmm3 421; X86-AVX1-NEXT: vpand %xmm4, %xmm3, %xmm3 422; X86-AVX1-NEXT: vpsllw $5, %xmm1, %xmm1 423; X86-AVX1-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0 424; X86-AVX1-NEXT: vpsllw $2, %xmm0, %xmm3 425; X86-AVX1-NEXT: vpand %xmm6, %xmm3, %xmm3 426; X86-AVX1-NEXT: vpaddb %xmm1, %xmm1, %xmm1 427; X86-AVX1-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0 428; X86-AVX1-NEXT: vpaddb %xmm0, %xmm0, %xmm3 429; X86-AVX1-NEXT: vpaddb %xmm1, %xmm1, %xmm1 430; X86-AVX1-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0 431; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 432; X86-AVX1-NEXT: retl 433; 434; X86-AVX2-LABEL: var_shift_v32i8: 435; X86-AVX2: # %bb.0: 436; X86-AVX2-NEXT: vpsllw $5, %ymm1, %ymm1 437; X86-AVX2-NEXT: vpsllw $4, %ymm0, %ymm2 438; X86-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %ymm2, %ymm2 439; X86-AVX2-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 440; X86-AVX2-NEXT: vpsllw $2, %ymm0, %ymm2 441; X86-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %ymm2, %ymm2 442; X86-AVX2-NEXT: vpaddb %ymm1, %ymm1, %ymm1 443; X86-AVX2-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 444; X86-AVX2-NEXT: vpaddb %ymm0, %ymm0, %ymm2 445; X86-AVX2-NEXT: vpaddb %ymm1, %ymm1, %ymm1 446; X86-AVX2-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 447; X86-AVX2-NEXT: retl 448 %shift = shl <32 x i8> %a, %b 449 ret <32 x i8> %shift 450} 451 452; 453; Uniform Variable Shifts 454; 455 456define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind { 457; AVX1-LABEL: splatvar_shift_v4i64: 458; AVX1: # %bb.0: 459; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 460; AVX1-NEXT: vpsllq %xmm1, %xmm2, %xmm2 461; AVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm0 462; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 463; AVX1-NEXT: retq 464; 465; AVX2-LABEL: splatvar_shift_v4i64: 466; AVX2: # %bb.0: 467; AVX2-NEXT: vpsllq %xmm1, %ymm0, %ymm0 468; AVX2-NEXT: retq 469; 470; XOPAVX1-LABEL: splatvar_shift_v4i64: 471; XOPAVX1: # %bb.0: 472; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 473; XOPAVX1-NEXT: vpsllq %xmm1, %xmm2, %xmm2 474; XOPAVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm0 475; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 476; XOPAVX1-NEXT: retq 477; 478; XOPAVX2-LABEL: splatvar_shift_v4i64: 479; XOPAVX2: # %bb.0: 480; XOPAVX2-NEXT: vpsllq %xmm1, %ymm0, %ymm0 481; XOPAVX2-NEXT: retq 482; 483; AVX512-LABEL: splatvar_shift_v4i64: 484; AVX512: # %bb.0: 485; AVX512-NEXT: vpsllq %xmm1, %ymm0, %ymm0 486; AVX512-NEXT: retq 487; 488; AVX512VL-LABEL: splatvar_shift_v4i64: 489; AVX512VL: # %bb.0: 490; AVX512VL-NEXT: vpsllq %xmm1, %ymm0, %ymm0 491; AVX512VL-NEXT: retq 492; 493; X86-AVX1-LABEL: splatvar_shift_v4i64: 494; X86-AVX1: # %bb.0: 495; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 496; X86-AVX1-NEXT: vpsllq %xmm1, %xmm2, %xmm2 497; X86-AVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm0 498; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 499; X86-AVX1-NEXT: retl 500; 501; X86-AVX2-LABEL: splatvar_shift_v4i64: 502; X86-AVX2: # %bb.0: 503; X86-AVX2-NEXT: vpsllq %xmm1, %ymm0, %ymm0 504; X86-AVX2-NEXT: retl 505 %splat = shufflevector <4 x i64> %b, <4 x i64> poison, <4 x i32> zeroinitializer 506 %shift = shl <4 x i64> %a, %splat 507 ret <4 x i64> %shift 508} 509 510define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind { 511; AVX1-LABEL: splatvar_shift_v8i32: 512; AVX1: # %bb.0: 513; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 514; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero 515; AVX1-NEXT: vpslld %xmm1, %xmm2, %xmm2 516; AVX1-NEXT: vpslld %xmm1, %xmm0, %xmm0 517; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 518; AVX1-NEXT: retq 519; 520; AVX2-LABEL: splatvar_shift_v8i32: 521; AVX2: # %bb.0: 522; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero 523; AVX2-NEXT: vpslld %xmm1, %ymm0, %ymm0 524; AVX2-NEXT: retq 525; 526; XOPAVX1-LABEL: splatvar_shift_v8i32: 527; XOPAVX1: # %bb.0: 528; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 529; XOPAVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero 530; XOPAVX1-NEXT: vpslld %xmm1, %xmm2, %xmm2 531; XOPAVX1-NEXT: vpslld %xmm1, %xmm0, %xmm0 532; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 533; XOPAVX1-NEXT: retq 534; 535; XOPAVX2-LABEL: splatvar_shift_v8i32: 536; XOPAVX2: # %bb.0: 537; XOPAVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero 538; XOPAVX2-NEXT: vpslld %xmm1, %ymm0, %ymm0 539; XOPAVX2-NEXT: retq 540; 541; AVX512-LABEL: splatvar_shift_v8i32: 542; AVX512: # %bb.0: 543; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero 544; AVX512-NEXT: vpslld %xmm1, %ymm0, %ymm0 545; AVX512-NEXT: retq 546; 547; AVX512VL-LABEL: splatvar_shift_v8i32: 548; AVX512VL: # %bb.0: 549; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero 550; AVX512VL-NEXT: vpslld %xmm1, %ymm0, %ymm0 551; AVX512VL-NEXT: retq 552; 553; X86-AVX1-LABEL: splatvar_shift_v8i32: 554; X86-AVX1: # %bb.0: 555; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 556; X86-AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero 557; X86-AVX1-NEXT: vpslld %xmm1, %xmm2, %xmm2 558; X86-AVX1-NEXT: vpslld %xmm1, %xmm0, %xmm0 559; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 560; X86-AVX1-NEXT: retl 561; 562; X86-AVX2-LABEL: splatvar_shift_v8i32: 563; X86-AVX2: # %bb.0: 564; X86-AVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero 565; X86-AVX2-NEXT: vpslld %xmm1, %ymm0, %ymm0 566; X86-AVX2-NEXT: retl 567 %splat = shufflevector <8 x i32> %b, <8 x i32> poison, <8 x i32> zeroinitializer 568 %shift = shl <8 x i32> %a, %splat 569 ret <8 x i32> %shift 570} 571 572define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind { 573; AVX1-LABEL: splatvar_shift_v16i16: 574; AVX1: # %bb.0: 575; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 576; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero 577; AVX1-NEXT: vpsllw %xmm1, %xmm2, %xmm2 578; AVX1-NEXT: vpsllw %xmm1, %xmm0, %xmm0 579; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 580; AVX1-NEXT: retq 581; 582; AVX2-LABEL: splatvar_shift_v16i16: 583; AVX2: # %bb.0: 584; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero 585; AVX2-NEXT: vpsllw %xmm1, %ymm0, %ymm0 586; AVX2-NEXT: retq 587; 588; XOPAVX1-LABEL: splatvar_shift_v16i16: 589; XOPAVX1: # %bb.0: 590; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 591; XOPAVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero 592; XOPAVX1-NEXT: vpsllw %xmm1, %xmm2, %xmm2 593; XOPAVX1-NEXT: vpsllw %xmm1, %xmm0, %xmm0 594; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 595; XOPAVX1-NEXT: retq 596; 597; XOPAVX2-LABEL: splatvar_shift_v16i16: 598; XOPAVX2: # %bb.0: 599; XOPAVX2-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero 600; XOPAVX2-NEXT: vpsllw %xmm1, %ymm0, %ymm0 601; XOPAVX2-NEXT: retq 602; 603; AVX512-LABEL: splatvar_shift_v16i16: 604; AVX512: # %bb.0: 605; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero 606; AVX512-NEXT: vpsllw %xmm1, %ymm0, %ymm0 607; AVX512-NEXT: retq 608; 609; AVX512VL-LABEL: splatvar_shift_v16i16: 610; AVX512VL: # %bb.0: 611; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero 612; AVX512VL-NEXT: vpsllw %xmm1, %ymm0, %ymm0 613; AVX512VL-NEXT: retq 614; 615; X86-AVX1-LABEL: splatvar_shift_v16i16: 616; X86-AVX1: # %bb.0: 617; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 618; X86-AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero 619; X86-AVX1-NEXT: vpsllw %xmm1, %xmm2, %xmm2 620; X86-AVX1-NEXT: vpsllw %xmm1, %xmm0, %xmm0 621; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 622; X86-AVX1-NEXT: retl 623; 624; X86-AVX2-LABEL: splatvar_shift_v16i16: 625; X86-AVX2: # %bb.0: 626; X86-AVX2-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero 627; X86-AVX2-NEXT: vpsllw %xmm1, %ymm0, %ymm0 628; X86-AVX2-NEXT: retl 629 %splat = shufflevector <16 x i16> %b, <16 x i16> poison, <16 x i32> zeroinitializer 630 %shift = shl <16 x i16> %a, %splat 631 ret <16 x i16> %shift 632} 633 634define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind { 635; AVX1-LABEL: splatvar_shift_v32i8: 636; AVX1: # %bb.0: 637; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 638; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero 639; AVX1-NEXT: vpsllw %xmm1, %xmm2, %xmm2 640; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 641; AVX1-NEXT: vpsllw %xmm1, %xmm3, %xmm3 642; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4 643; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm3 644; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2 645; AVX1-NEXT: vpsllw %xmm1, %xmm0, %xmm0 646; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm0 647; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 648; AVX1-NEXT: retq 649; 650; AVX2-LABEL: splatvar_shift_v32i8: 651; AVX2: # %bb.0: 652; AVX2-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero 653; AVX2-NEXT: vpsllw %xmm1, %ymm0, %ymm0 654; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 655; AVX2-NEXT: vpsllw %xmm1, %xmm2, %xmm1 656; AVX2-NEXT: vpbroadcastb %xmm1, %ymm1 657; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 658; AVX2-NEXT: retq 659; 660; XOPAVX1-LABEL: splatvar_shift_v32i8: 661; XOPAVX1: # %bb.0: 662; XOPAVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 663; XOPAVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 664; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 665; XOPAVX1-NEXT: vpshlb %xmm1, %xmm2, %xmm2 666; XOPAVX1-NEXT: vpshlb %xmm1, %xmm0, %xmm0 667; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 668; XOPAVX1-NEXT: retq 669; 670; XOPAVX2-LABEL: splatvar_shift_v32i8: 671; XOPAVX2: # %bb.0: 672; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm2 673; XOPAVX2-NEXT: vpbroadcastb %xmm1, %xmm1 674; XOPAVX2-NEXT: vpshlb %xmm1, %xmm2, %xmm2 675; XOPAVX2-NEXT: vpshlb %xmm1, %xmm0, %xmm0 676; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 677; XOPAVX2-NEXT: retq 678; 679; AVX512DQ-LABEL: splatvar_shift_v32i8: 680; AVX512DQ: # %bb.0: 681; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero 682; AVX512DQ-NEXT: vpsllw %xmm1, %ymm0, %ymm0 683; AVX512DQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 684; AVX512DQ-NEXT: vpsllw %xmm1, %xmm2, %xmm1 685; AVX512DQ-NEXT: vpbroadcastb %xmm1, %ymm1 686; AVX512DQ-NEXT: vpand %ymm1, %ymm0, %ymm0 687; AVX512DQ-NEXT: retq 688; 689; AVX512BW-LABEL: splatvar_shift_v32i8: 690; AVX512BW: # %bb.0: 691; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero 692; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero 693; AVX512BW-NEXT: vpsllw %xmm1, %zmm0, %zmm0 694; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 695; AVX512BW-NEXT: retq 696; 697; AVX512DQVL-LABEL: splatvar_shift_v32i8: 698; AVX512DQVL: # %bb.0: 699; AVX512DQVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero 700; AVX512DQVL-NEXT: vpsllw %xmm1, %ymm0, %ymm0 701; AVX512DQVL-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 702; AVX512DQVL-NEXT: vpsllw %xmm1, %xmm2, %xmm1 703; AVX512DQVL-NEXT: vpbroadcastb %xmm1, %ymm1 704; AVX512DQVL-NEXT: vpand %ymm1, %ymm0, %ymm0 705; AVX512DQVL-NEXT: retq 706; 707; AVX512BWVL-LABEL: splatvar_shift_v32i8: 708; AVX512BWVL: # %bb.0: 709; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero 710; AVX512BWVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero 711; AVX512BWVL-NEXT: vpsllw %xmm1, %zmm0, %zmm0 712; AVX512BWVL-NEXT: vpmovwb %zmm0, %ymm0 713; AVX512BWVL-NEXT: retq 714; 715; X86-AVX1-LABEL: splatvar_shift_v32i8: 716; X86-AVX1: # %bb.0: 717; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 718; X86-AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero 719; X86-AVX1-NEXT: vpsllw %xmm1, %xmm2, %xmm2 720; X86-AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 721; X86-AVX1-NEXT: vpsllw %xmm1, %xmm3, %xmm3 722; X86-AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4 723; X86-AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm3 724; X86-AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2 725; X86-AVX1-NEXT: vpsllw %xmm1, %xmm0, %xmm0 726; X86-AVX1-NEXT: vpand %xmm3, %xmm0, %xmm0 727; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 728; X86-AVX1-NEXT: retl 729; 730; X86-AVX2-LABEL: splatvar_shift_v32i8: 731; X86-AVX2: # %bb.0: 732; X86-AVX2-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero 733; X86-AVX2-NEXT: vpsllw %xmm1, %ymm0, %ymm0 734; X86-AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 735; X86-AVX2-NEXT: vpsllw %xmm1, %xmm2, %xmm1 736; X86-AVX2-NEXT: vpbroadcastb %xmm1, %ymm1 737; X86-AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 738; X86-AVX2-NEXT: retl 739 %splat = shufflevector <32 x i8> %b, <32 x i8> poison, <32 x i32> zeroinitializer 740 %shift = shl <32 x i8> %a, %splat 741 ret <32 x i8> %shift 742} 743 744; 745; Uniform Variable Modulo Shifts 746; 747 748define <4 x i64> @splatvar_modulo_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind { 749; AVX1-LABEL: splatvar_modulo_shift_v4i64: 750; AVX1: # %bb.0: 751; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 752; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 753; AVX1-NEXT: vpsllq %xmm1, %xmm2, %xmm2 754; AVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm0 755; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 756; AVX1-NEXT: retq 757; 758; AVX2-LABEL: splatvar_modulo_shift_v4i64: 759; AVX2: # %bb.0: 760; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 761; AVX2-NEXT: vpsllq %xmm1, %ymm0, %ymm0 762; AVX2-NEXT: retq 763; 764; XOPAVX1-LABEL: splatvar_modulo_shift_v4i64: 765; XOPAVX1: # %bb.0: 766; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 767; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 768; XOPAVX1-NEXT: vpsllq %xmm1, %xmm2, %xmm2 769; XOPAVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm0 770; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 771; XOPAVX1-NEXT: retq 772; 773; XOPAVX2-LABEL: splatvar_modulo_shift_v4i64: 774; XOPAVX2: # %bb.0: 775; XOPAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 776; XOPAVX2-NEXT: vpsllq %xmm1, %ymm0, %ymm0 777; XOPAVX2-NEXT: retq 778; 779; AVX512-LABEL: splatvar_modulo_shift_v4i64: 780; AVX512: # %bb.0: 781; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 782; AVX512-NEXT: vpsllq %xmm1, %ymm0, %ymm0 783; AVX512-NEXT: retq 784; 785; AVX512VL-LABEL: splatvar_modulo_shift_v4i64: 786; AVX512VL: # %bb.0: 787; AVX512VL-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm1 788; AVX512VL-NEXT: vpsllq %xmm1, %ymm0, %ymm0 789; AVX512VL-NEXT: retq 790; 791; X86-AVX1-LABEL: splatvar_modulo_shift_v4i64: 792; X86-AVX1: # %bb.0: 793; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 794; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1 795; X86-AVX1-NEXT: vpsllq %xmm1, %xmm2, %xmm2 796; X86-AVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm0 797; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 798; X86-AVX1-NEXT: retl 799; 800; X86-AVX2-LABEL: splatvar_modulo_shift_v4i64: 801; X86-AVX2: # %bb.0: 802; X86-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1 803; X86-AVX2-NEXT: vpsllq %xmm1, %ymm0, %ymm0 804; X86-AVX2-NEXT: retl 805 %mod = and <4 x i64> %b, <i64 63, i64 63, i64 63, i64 63> 806 %splat = shufflevector <4 x i64> %mod, <4 x i64> poison, <4 x i32> zeroinitializer 807 %shift = shl <4 x i64> %a, %splat 808 ret <4 x i64> %shift 809} 810 811define <8 x i32> @splatvar_modulo_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind { 812; AVX1-LABEL: splatvar_modulo_shift_v8i32: 813; AVX1: # %bb.0: 814; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 815; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 816; AVX1-NEXT: vpslld %xmm1, %xmm2, %xmm2 817; AVX1-NEXT: vpslld %xmm1, %xmm0, %xmm0 818; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 819; AVX1-NEXT: retq 820; 821; AVX2-LABEL: splatvar_modulo_shift_v8i32: 822; AVX2: # %bb.0: 823; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 824; AVX2-NEXT: vpslld %xmm1, %ymm0, %ymm0 825; AVX2-NEXT: retq 826; 827; XOPAVX1-LABEL: splatvar_modulo_shift_v8i32: 828; XOPAVX1: # %bb.0: 829; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 830; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 831; XOPAVX1-NEXT: vpslld %xmm1, %xmm2, %xmm2 832; XOPAVX1-NEXT: vpslld %xmm1, %xmm0, %xmm0 833; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 834; XOPAVX1-NEXT: retq 835; 836; XOPAVX2-LABEL: splatvar_modulo_shift_v8i32: 837; XOPAVX2: # %bb.0: 838; XOPAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 839; XOPAVX2-NEXT: vpslld %xmm1, %ymm0, %ymm0 840; XOPAVX2-NEXT: retq 841; 842; AVX512-LABEL: splatvar_modulo_shift_v8i32: 843; AVX512: # %bb.0: 844; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 845; AVX512-NEXT: vpslld %xmm1, %ymm0, %ymm0 846; AVX512-NEXT: retq 847; 848; AVX512VL-LABEL: splatvar_modulo_shift_v8i32: 849; AVX512VL: # %bb.0: 850; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 851; AVX512VL-NEXT: vpslld %xmm1, %ymm0, %ymm0 852; AVX512VL-NEXT: retq 853; 854; X86-AVX1-LABEL: splatvar_modulo_shift_v8i32: 855; X86-AVX1: # %bb.0: 856; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 857; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1 858; X86-AVX1-NEXT: vpslld %xmm1, %xmm2, %xmm2 859; X86-AVX1-NEXT: vpslld %xmm1, %xmm0, %xmm0 860; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 861; X86-AVX1-NEXT: retl 862; 863; X86-AVX2-LABEL: splatvar_modulo_shift_v8i32: 864; X86-AVX2: # %bb.0: 865; X86-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1 866; X86-AVX2-NEXT: vpslld %xmm1, %ymm0, %ymm0 867; X86-AVX2-NEXT: retl 868 %mod = and <8 x i32> %b, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31> 869 %splat = shufflevector <8 x i32> %mod, <8 x i32> poison, <8 x i32> zeroinitializer 870 %shift = shl <8 x i32> %a, %splat 871 ret <8 x i32> %shift 872} 873 874define <16 x i16> @splatvar_modulo_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind { 875; AVX1-LABEL: splatvar_modulo_shift_v16i16: 876; AVX1: # %bb.0: 877; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 878; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 879; AVX1-NEXT: vpsllw %xmm1, %xmm2, %xmm2 880; AVX1-NEXT: vpsllw %xmm1, %xmm0, %xmm0 881; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 882; AVX1-NEXT: retq 883; 884; AVX2-LABEL: splatvar_modulo_shift_v16i16: 885; AVX2: # %bb.0: 886; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 887; AVX2-NEXT: vpsllw %xmm1, %ymm0, %ymm0 888; AVX2-NEXT: retq 889; 890; XOPAVX1-LABEL: splatvar_modulo_shift_v16i16: 891; XOPAVX1: # %bb.0: 892; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 893; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 894; XOPAVX1-NEXT: vpsllw %xmm1, %xmm2, %xmm2 895; XOPAVX1-NEXT: vpsllw %xmm1, %xmm0, %xmm0 896; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 897; XOPAVX1-NEXT: retq 898; 899; XOPAVX2-LABEL: splatvar_modulo_shift_v16i16: 900; XOPAVX2: # %bb.0: 901; XOPAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 902; XOPAVX2-NEXT: vpsllw %xmm1, %ymm0, %ymm0 903; XOPAVX2-NEXT: retq 904; 905; AVX512-LABEL: splatvar_modulo_shift_v16i16: 906; AVX512: # %bb.0: 907; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 908; AVX512-NEXT: vpsllw %xmm1, %ymm0, %ymm0 909; AVX512-NEXT: retq 910; 911; AVX512VL-LABEL: splatvar_modulo_shift_v16i16: 912; AVX512VL: # %bb.0: 913; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 914; AVX512VL-NEXT: vpsllw %xmm1, %ymm0, %ymm0 915; AVX512VL-NEXT: retq 916; 917; X86-AVX1-LABEL: splatvar_modulo_shift_v16i16: 918; X86-AVX1: # %bb.0: 919; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 920; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1 921; X86-AVX1-NEXT: vpsllw %xmm1, %xmm2, %xmm2 922; X86-AVX1-NEXT: vpsllw %xmm1, %xmm0, %xmm0 923; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 924; X86-AVX1-NEXT: retl 925; 926; X86-AVX2-LABEL: splatvar_modulo_shift_v16i16: 927; X86-AVX2: # %bb.0: 928; X86-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1 929; X86-AVX2-NEXT: vpsllw %xmm1, %ymm0, %ymm0 930; X86-AVX2-NEXT: retl 931 %mod = and <16 x i16> %b, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> 932 %splat = shufflevector <16 x i16> %mod, <16 x i16> poison, <16 x i32> zeroinitializer 933 %shift = shl <16 x i16> %a, %splat 934 ret <16 x i16> %shift 935} 936 937define <32 x i8> @splatvar_modulo_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind { 938; AVX1-LABEL: splatvar_modulo_shift_v32i8: 939; AVX1: # %bb.0: 940; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 941; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 942; AVX1-NEXT: vpsllw %xmm1, %xmm2, %xmm2 943; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 944; AVX1-NEXT: vpsllw %xmm1, %xmm3, %xmm3 945; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4 946; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm3 947; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2 948; AVX1-NEXT: vpsllw %xmm1, %xmm0, %xmm0 949; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm0 950; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 951; AVX1-NEXT: retq 952; 953; AVX2-LABEL: splatvar_modulo_shift_v32i8: 954; AVX2: # %bb.0: 955; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 956; AVX2-NEXT: vpsllw %xmm1, %ymm0, %ymm0 957; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 958; AVX2-NEXT: vpsllw %xmm1, %xmm2, %xmm1 959; AVX2-NEXT: vpbroadcastb %xmm1, %ymm1 960; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 961; AVX2-NEXT: retq 962; 963; XOPAVX1-LABEL: splatvar_modulo_shift_v32i8: 964; XOPAVX1: # %bb.0: 965; XOPAVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 966; XOPAVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 967; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 968; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 969; XOPAVX1-NEXT: vpshlb %xmm1, %xmm2, %xmm2 970; XOPAVX1-NEXT: vpshlb %xmm1, %xmm0, %xmm0 971; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 972; XOPAVX1-NEXT: retq 973; 974; XOPAVX2-LABEL: splatvar_modulo_shift_v32i8: 975; XOPAVX2: # %bb.0: 976; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm2 977; XOPAVX2-NEXT: vpbroadcastb %xmm1, %xmm1 978; XOPAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 979; XOPAVX2-NEXT: vpshlb %xmm1, %xmm2, %xmm2 980; XOPAVX2-NEXT: vpshlb %xmm1, %xmm0, %xmm0 981; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 982; XOPAVX2-NEXT: retq 983; 984; AVX512DQ-LABEL: splatvar_modulo_shift_v32i8: 985; AVX512DQ: # %bb.0: 986; AVX512DQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 987; AVX512DQ-NEXT: vpsllw %xmm1, %ymm0, %ymm0 988; AVX512DQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 989; AVX512DQ-NEXT: vpsllw %xmm1, %xmm2, %xmm1 990; AVX512DQ-NEXT: vpbroadcastb %xmm1, %ymm1 991; AVX512DQ-NEXT: vpand %ymm1, %ymm0, %ymm0 992; AVX512DQ-NEXT: retq 993; 994; AVX512BW-LABEL: splatvar_modulo_shift_v32i8: 995; AVX512BW: # %bb.0: 996; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero 997; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 998; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero 999; AVX512BW-NEXT: vpsllw %xmm1, %zmm0, %zmm0 1000; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 1001; AVX512BW-NEXT: retq 1002; 1003; AVX512DQVL-LABEL: splatvar_modulo_shift_v32i8: 1004; AVX512DQVL: # %bb.0: 1005; AVX512DQVL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 1006; AVX512DQVL-NEXT: vpsllw %xmm1, %ymm0, %ymm0 1007; AVX512DQVL-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 1008; AVX512DQVL-NEXT: vpsllw %xmm1, %xmm2, %xmm1 1009; AVX512DQVL-NEXT: vpbroadcastb %xmm1, %ymm1 1010; AVX512DQVL-NEXT: vpand %ymm1, %ymm0, %ymm0 1011; AVX512DQVL-NEXT: retq 1012; 1013; AVX512BWVL-LABEL: splatvar_modulo_shift_v32i8: 1014; AVX512BWVL: # %bb.0: 1015; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero 1016; AVX512BWVL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1 1017; AVX512BWVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero 1018; AVX512BWVL-NEXT: vpsllw %xmm1, %zmm0, %zmm0 1019; AVX512BWVL-NEXT: vpmovwb %zmm0, %ymm0 1020; AVX512BWVL-NEXT: retq 1021; 1022; X86-AVX1-LABEL: splatvar_modulo_shift_v32i8: 1023; X86-AVX1: # %bb.0: 1024; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 1025; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1 1026; X86-AVX1-NEXT: vpsllw %xmm1, %xmm2, %xmm2 1027; X86-AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 1028; X86-AVX1-NEXT: vpsllw %xmm1, %xmm3, %xmm3 1029; X86-AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4 1030; X86-AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm3 1031; X86-AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2 1032; X86-AVX1-NEXT: vpsllw %xmm1, %xmm0, %xmm0 1033; X86-AVX1-NEXT: vpand %xmm3, %xmm0, %xmm0 1034; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1035; X86-AVX1-NEXT: retl 1036; 1037; X86-AVX2-LABEL: splatvar_modulo_shift_v32i8: 1038; X86-AVX2: # %bb.0: 1039; X86-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1 1040; X86-AVX2-NEXT: vpsllw %xmm1, %ymm0, %ymm0 1041; X86-AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 1042; X86-AVX2-NEXT: vpsllw %xmm1, %xmm2, %xmm1 1043; X86-AVX2-NEXT: vpbroadcastb %xmm1, %ymm1 1044; X86-AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 1045; X86-AVX2-NEXT: retl 1046 %mod = and <32 x i8> %b, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7> 1047 %splat = shufflevector <32 x i8> %mod, <32 x i8> poison, <32 x i32> zeroinitializer 1048 %shift = shl <32 x i8> %a, %splat 1049 ret <32 x i8> %shift 1050} 1051 1052; 1053; Constant Shifts 1054; 1055 1056define <4 x i64> @constant_shift_v4i64(<4 x i64> %a) nounwind { 1057; AVX1-LABEL: constant_shift_v4i64: 1058; AVX1: # %bb.0: 1059; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1060; AVX1-NEXT: vpsllq $62, %xmm1, %xmm2 1061; AVX1-NEXT: vpsllq $31, %xmm1, %xmm1 1062; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7] 1063; AVX1-NEXT: vpsllq $7, %xmm0, %xmm2 1064; AVX1-NEXT: vpaddq %xmm0, %xmm0, %xmm0 1065; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7] 1066; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1067; AVX1-NEXT: retq 1068; 1069; AVX2-LABEL: constant_shift_v4i64: 1070; AVX2: # %bb.0: 1071; AVX2-NEXT: vpsllvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 1072; AVX2-NEXT: retq 1073; 1074; XOPAVX1-LABEL: constant_shift_v4i64: 1075; XOPAVX1: # %bb.0: 1076; XOPAVX1-NEXT: vpshlq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 1077; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1078; XOPAVX1-NEXT: vpshlq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1079; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1080; XOPAVX1-NEXT: retq 1081; 1082; XOPAVX2-LABEL: constant_shift_v4i64: 1083; XOPAVX2: # %bb.0: 1084; XOPAVX2-NEXT: vpsllvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 1085; XOPAVX2-NEXT: retq 1086; 1087; AVX512-LABEL: constant_shift_v4i64: 1088; AVX512: # %bb.0: 1089; AVX512-NEXT: vpsllvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 1090; AVX512-NEXT: retq 1091; 1092; AVX512VL-LABEL: constant_shift_v4i64: 1093; AVX512VL: # %bb.0: 1094; AVX512VL-NEXT: vpsllvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 1095; AVX512VL-NEXT: retq 1096; 1097; X86-AVX1-LABEL: constant_shift_v4i64: 1098; X86-AVX1: # %bb.0: 1099; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1100; X86-AVX1-NEXT: vpsllq $62, %xmm1, %xmm2 1101; X86-AVX1-NEXT: vpsllq $31, %xmm1, %xmm1 1102; X86-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7] 1103; X86-AVX1-NEXT: vpsllq $7, %xmm0, %xmm2 1104; X86-AVX1-NEXT: vpaddq %xmm0, %xmm0, %xmm0 1105; X86-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7] 1106; X86-AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1107; X86-AVX1-NEXT: retl 1108; 1109; X86-AVX2-LABEL: constant_shift_v4i64: 1110; X86-AVX2: # %bb.0: 1111; X86-AVX2-NEXT: vpsllvq {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 1112; X86-AVX2-NEXT: retl 1113 %shift = shl <4 x i64> %a, <i64 1, i64 7, i64 31, i64 62> 1114 ret <4 x i64> %shift 1115} 1116 1117define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) nounwind { 1118; AVX1-LABEL: constant_shift_v8i32: 1119; AVX1: # %bb.0: 1120; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 1121; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1122; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1123; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1124; AVX1-NEXT: retq 1125; 1126; AVX2-LABEL: constant_shift_v8i32: 1127; AVX2: # %bb.0: 1128; AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 1129; AVX2-NEXT: retq 1130; 1131; XOPAVX1-LABEL: constant_shift_v8i32: 1132; XOPAVX1: # %bb.0: 1133; XOPAVX1-NEXT: vpshld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 1134; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1135; XOPAVX1-NEXT: vpshld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1136; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1137; XOPAVX1-NEXT: retq 1138; 1139; XOPAVX2-LABEL: constant_shift_v8i32: 1140; XOPAVX2: # %bb.0: 1141; XOPAVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 1142; XOPAVX2-NEXT: retq 1143; 1144; AVX512-LABEL: constant_shift_v8i32: 1145; AVX512: # %bb.0: 1146; AVX512-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 1147; AVX512-NEXT: retq 1148; 1149; AVX512VL-LABEL: constant_shift_v8i32: 1150; AVX512VL: # %bb.0: 1151; AVX512VL-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 1152; AVX512VL-NEXT: retq 1153; 1154; X86-AVX1-LABEL: constant_shift_v8i32: 1155; X86-AVX1: # %bb.0: 1156; X86-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm1 1157; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1158; X86-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 1159; X86-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1160; X86-AVX1-NEXT: retl 1161; 1162; X86-AVX2-LABEL: constant_shift_v8i32: 1163; X86-AVX2: # %bb.0: 1164; X86-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 1165; X86-AVX2-NEXT: retl 1166 %shift = shl <8 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 8, i32 7> 1167 ret <8 x i32> %shift 1168} 1169 1170define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) nounwind { 1171; AVX1-LABEL: constant_shift_v16i16: 1172; AVX1: # %bb.0: 1173; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 # [1,2,4,8,16,32,64,128] 1174; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1175; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [256,512,1024,2048,4096,8192,16384,32768] 1176; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1177; AVX1-NEXT: retq 1178; 1179; AVX2-LABEL: constant_shift_v16i16: 1180; AVX2: # %bb.0: 1181; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768] 1182; AVX2-NEXT: retq 1183; 1184; XOPAVX1-LABEL: constant_shift_v16i16: 1185; XOPAVX1: # %bb.0: 1186; XOPAVX1-NEXT: vpshlw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 1187; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1188; XOPAVX1-NEXT: vpshlw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1189; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1190; XOPAVX1-NEXT: retq 1191; 1192; XOPAVX2-LABEL: constant_shift_v16i16: 1193; XOPAVX2: # %bb.0: 1194; XOPAVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768] 1195; XOPAVX2-NEXT: retq 1196; 1197; AVX512DQ-LABEL: constant_shift_v16i16: 1198; AVX512DQ: # %bb.0: 1199; AVX512DQ-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768] 1200; AVX512DQ-NEXT: retq 1201; 1202; AVX512BW-LABEL: constant_shift_v16i16: 1203; AVX512BW: # %bb.0: 1204; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 1205; AVX512BW-NEXT: vpmovsxbw {{.*#+}} ymm1 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] 1206; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0 1207; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 1208; AVX512BW-NEXT: retq 1209; 1210; AVX512DQVL-LABEL: constant_shift_v16i16: 1211; AVX512DQVL: # %bb.0: 1212; AVX512DQVL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768] 1213; AVX512DQVL-NEXT: retq 1214; 1215; AVX512BWVL-LABEL: constant_shift_v16i16: 1216; AVX512BWVL: # %bb.0: 1217; AVX512BWVL-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 1218; AVX512BWVL-NEXT: retq 1219; 1220; X86-AVX1-LABEL: constant_shift_v16i16: 1221; X86-AVX1: # %bb.0: 1222; X86-AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm1 # [1,2,4,8,16,32,64,128] 1223; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1224; X86-AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 # [256,512,1024,2048,4096,8192,16384,32768] 1225; X86-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1226; X86-AVX1-NEXT: retl 1227; 1228; X86-AVX2-LABEL: constant_shift_v16i16: 1229; X86-AVX2: # %bb.0: 1230; X86-AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 # [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768] 1231; X86-AVX2-NEXT: retl 1232 %shift = shl <16 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15> 1233 ret <16 x i16> %shift 1234} 1235 1236define <16 x i16> @constant_shift_v16i16_pairs(<16 x i16> %a) nounwind { 1237; AVX1-LABEL: constant_shift_v16i16_pairs: 1238; AVX1: # %bb.0: 1239; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 # [4,4,8,8,1,1,2,2] 1240; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1241; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [64,64,128,128,16,16,32,32] 1242; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1243; AVX1-NEXT: retq 1244; 1245; AVX2-LABEL: constant_shift_v16i16_pairs: 1246; AVX2: # %bb.0: 1247; AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 1248; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 1249; AVX2-NEXT: retq 1250; 1251; XOPAVX1-LABEL: constant_shift_v16i16_pairs: 1252; XOPAVX1: # %bb.0: 1253; XOPAVX1-NEXT: vpshlw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 1254; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1255; XOPAVX1-NEXT: vpshlw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1256; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1257; XOPAVX1-NEXT: retq 1258; 1259; XOPAVX2-LABEL: constant_shift_v16i16_pairs: 1260; XOPAVX2: # %bb.0: 1261; XOPAVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [4,4,8,8,1,1,2,2,64,64,128,128,16,16,32,32] 1262; XOPAVX2-NEXT: retq 1263; 1264; AVX512DQ-LABEL: constant_shift_v16i16_pairs: 1265; AVX512DQ: # %bb.0: 1266; AVX512DQ-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 1267; AVX512DQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 1268; AVX512DQ-NEXT: retq 1269; 1270; AVX512BW-LABEL: constant_shift_v16i16_pairs: 1271; AVX512BW: # %bb.0: 1272; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 1273; AVX512BW-NEXT: vpmovsxbw {{.*#+}} ymm1 = [2,2,3,3,0,0,1,1,6,6,7,7,4,4,5,5] 1274; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0 1275; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 1276; AVX512BW-NEXT: retq 1277; 1278; AVX512DQVL-LABEL: constant_shift_v16i16_pairs: 1279; AVX512DQVL: # %bb.0: 1280; AVX512DQVL-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 1281; AVX512DQVL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 1282; AVX512DQVL-NEXT: retq 1283; 1284; AVX512BWVL-LABEL: constant_shift_v16i16_pairs: 1285; AVX512BWVL: # %bb.0: 1286; AVX512BWVL-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 1287; AVX512BWVL-NEXT: retq 1288; 1289; X86-AVX1-LABEL: constant_shift_v16i16_pairs: 1290; X86-AVX1: # %bb.0: 1291; X86-AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm1 # [4,4,8,8,1,1,2,2] 1292; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1293; X86-AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 # [64,64,128,128,16,16,32,32] 1294; X86-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1295; X86-AVX1-NEXT: retl 1296; 1297; X86-AVX2-LABEL: constant_shift_v16i16_pairs: 1298; X86-AVX2: # %bb.0: 1299; X86-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 1300; X86-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 1301; X86-AVX2-NEXT: retl 1302 %shift = shl <16 x i16> %a, <i16 2, i16 2, i16 3, i16 3, i16 0, i16 0, i16 1, i16 1, i16 6, i16 6, i16 7, i16 7, i16 4, i16 4, i16 5, i16 5> 1303 ret <16 x i16> %shift 1304} 1305 1306define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) nounwind { 1307; AVX1-LABEL: constant_shift_v32i8: 1308; AVX1: # %bb.0: 1309; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1310; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm2 = [1,4,16,64,128,32,8,2] 1311; AVX1-NEXT: vpmaddubsw %xmm2, %xmm1, %xmm3 1312; AVX1-NEXT: vbroadcastss {{.*#+}} xmm4 = [255,255,255,255,255,255,255,255] 1313; AVX1-NEXT: vpand %xmm4, %xmm3, %xmm3 1314; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [0,2,0,8,0,32,0,128,0,64,0,16,0,4,0,1] 1315; AVX1-NEXT: vpmaddubsw %xmm5, %xmm1, %xmm1 1316; AVX1-NEXT: vpsllw $8, %xmm1, %xmm1 1317; AVX1-NEXT: vpor %xmm1, %xmm3, %xmm1 1318; AVX1-NEXT: vpmaddubsw %xmm2, %xmm0, %xmm2 1319; AVX1-NEXT: vpand %xmm4, %xmm2, %xmm2 1320; AVX1-NEXT: vpmaddubsw %xmm5, %xmm0, %xmm0 1321; AVX1-NEXT: vpsllw $8, %xmm0, %xmm0 1322; AVX1-NEXT: vpor %xmm0, %xmm2, %xmm0 1323; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1324; AVX1-NEXT: retq 1325; 1326; AVX2-LABEL: constant_shift_v32i8: 1327; AVX2: # %bb.0: 1328; AVX2-NEXT: vpmaddubsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1 # [0,2,0,8,0,32,0,128,0,64,0,16,0,4,0,1,0,2,0,8,0,32,0,128,0,64,0,16,0,4,0,1] 1329; AVX2-NEXT: vpsllw $8, %ymm1, %ymm1 1330; AVX2-NEXT: vpmaddubsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [1,0,4,0,16,0,64,0,128,0,32,0,8,0,2,0,1,0,4,0,16,0,64,0,128,0,32,0,8,0,2,0] 1331; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 1332; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 1333; AVX2-NEXT: retq 1334; 1335; XOPAVX1-LABEL: constant_shift_v32i8: 1336; XOPAVX1: # %bb.0: 1337; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1338; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0] 1339; XOPAVX1-NEXT: vpshlb %xmm2, %xmm1, %xmm1 1340; XOPAVX1-NEXT: vpshlb %xmm2, %xmm0, %xmm0 1341; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1342; XOPAVX1-NEXT: retq 1343; 1344; XOPAVX2-LABEL: constant_shift_v32i8: 1345; XOPAVX2: # %bb.0: 1346; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1347; XOPAVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0] 1348; XOPAVX2-NEXT: vpshlb %xmm2, %xmm1, %xmm1 1349; XOPAVX2-NEXT: vpshlb %xmm2, %xmm0, %xmm0 1350; XOPAVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 1351; XOPAVX2-NEXT: retq 1352; 1353; AVX512DQ-LABEL: constant_shift_v32i8: 1354; AVX512DQ: # %bb.0: 1355; AVX512DQ-NEXT: vpmaddubsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1 # [0,2,0,8,0,32,0,128,0,64,0,16,0,4,0,1,0,2,0,8,0,32,0,128,0,64,0,16,0,4,0,1] 1356; AVX512DQ-NEXT: vpsllw $8, %ymm1, %ymm1 1357; AVX512DQ-NEXT: vpmaddubsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [1,0,4,0,16,0,64,0,128,0,32,0,8,0,2,0,1,0,4,0,16,0,64,0,128,0,32,0,8,0,2,0] 1358; AVX512DQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 1359; AVX512DQ-NEXT: vpor %ymm1, %ymm0, %ymm0 1360; AVX512DQ-NEXT: retq 1361; 1362; AVX512BW-LABEL: constant_shift_v32i8: 1363; AVX512BW: # %bb.0: 1364; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero 1365; AVX512BW-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 1366; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 1367; AVX512BW-NEXT: retq 1368; 1369; AVX512DQVL-LABEL: constant_shift_v32i8: 1370; AVX512DQVL: # %bb.0: 1371; AVX512DQVL-NEXT: vpmaddubsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1 # [1,0,4,0,16,0,64,0,128,0,32,0,8,0,2,0,1,0,4,0,16,0,64,0,128,0,32,0,8,0,2,0] 1372; AVX512DQVL-NEXT: vpmaddubsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [0,2,0,8,0,32,0,128,0,64,0,16,0,4,0,1,0,2,0,8,0,32,0,128,0,64,0,16,0,4,0,1] 1373; AVX512DQVL-NEXT: vpsllw $8, %ymm0, %ymm0 1374; AVX512DQVL-NEXT: vpternlogd {{.*#+}} ymm0 = ymm0 | (ymm1 & mem) 1375; AVX512DQVL-NEXT: retq 1376; 1377; AVX512BWVL-LABEL: constant_shift_v32i8: 1378; AVX512BWVL: # %bb.0: 1379; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero 1380; AVX512BWVL-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 1381; AVX512BWVL-NEXT: vpmovwb %zmm0, %ymm0 1382; AVX512BWVL-NEXT: retq 1383; 1384; X86-AVX1-LABEL: constant_shift_v32i8: 1385; X86-AVX1: # %bb.0: 1386; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1387; X86-AVX1-NEXT: vpmovzxbw {{.*#+}} xmm2 = [1,4,16,64,128,32,8,2] 1388; X86-AVX1-NEXT: vpmaddubsw %xmm2, %xmm1, %xmm3 1389; X86-AVX1-NEXT: vbroadcastss {{.*#+}} xmm4 = [255,255,255,255,255,255,255,255] 1390; X86-AVX1-NEXT: vpand %xmm4, %xmm3, %xmm3 1391; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [0,2,0,8,0,32,0,128,0,64,0,16,0,4,0,1] 1392; X86-AVX1-NEXT: vpmaddubsw %xmm5, %xmm1, %xmm1 1393; X86-AVX1-NEXT: vpsllw $8, %xmm1, %xmm1 1394; X86-AVX1-NEXT: vpor %xmm1, %xmm3, %xmm1 1395; X86-AVX1-NEXT: vpmaddubsw %xmm2, %xmm0, %xmm2 1396; X86-AVX1-NEXT: vpand %xmm4, %xmm2, %xmm2 1397; X86-AVX1-NEXT: vpmaddubsw %xmm5, %xmm0, %xmm0 1398; X86-AVX1-NEXT: vpsllw $8, %xmm0, %xmm0 1399; X86-AVX1-NEXT: vpor %xmm0, %xmm2, %xmm0 1400; X86-AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1401; X86-AVX1-NEXT: retl 1402; 1403; X86-AVX2-LABEL: constant_shift_v32i8: 1404; X86-AVX2: # %bb.0: 1405; X86-AVX2-NEXT: vpmaddubsw {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm1 # [0,2,0,8,0,32,0,128,0,64,0,16,0,4,0,1,0,2,0,8,0,32,0,128,0,64,0,16,0,4,0,1] 1406; X86-AVX2-NEXT: vpsllw $8, %ymm1, %ymm1 1407; X86-AVX2-NEXT: vpmaddubsw {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 # [1,0,4,0,16,0,64,0,128,0,32,0,8,0,2,0,1,0,4,0,16,0,64,0,128,0,32,0,8,0,2,0] 1408; X86-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 1409; X86-AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 1410; X86-AVX2-NEXT: retl 1411 %shift = shl <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0> 1412 ret <32 x i8> %shift 1413} 1414 1415define <32 x i8> @constant_shift_v32i8_pairs(<32 x i8> %a) nounwind { 1416; AVX1-LABEL: constant_shift_v32i8_pairs: 1417; AVX1: # %bb.0: 1418; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 # [8,128,64,4,128,1,128,2] 1419; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 1420; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1421; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [32,1,16,128,64,2,16,1] 1422; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1423; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1424; AVX1-NEXT: retq 1425; 1426; AVX2-LABEL: constant_shift_v32i8_pairs: 1427; AVX2: # %bb.0: 1428; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [8,128,64,4,128,1,128,2,32,1,16,128,64,2,16,1] 1429; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 1430; AVX2-NEXT: retq 1431; 1432; XOPAVX1-LABEL: constant_shift_v32i8_pairs: 1433; XOPAVX1: # %bb.0: 1434; XOPAVX1-NEXT: vpshlb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 1435; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1436; XOPAVX1-NEXT: vpshlb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1437; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1438; XOPAVX1-NEXT: retq 1439; 1440; XOPAVX2-LABEL: constant_shift_v32i8_pairs: 1441; XOPAVX2: # %bb.0: 1442; XOPAVX2-NEXT: vpshlb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 1443; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 1444; XOPAVX2-NEXT: vpshlb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1445; XOPAVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 1446; XOPAVX2-NEXT: retq 1447; 1448; AVX512DQ-LABEL: constant_shift_v32i8_pairs: 1449; AVX512DQ: # %bb.0: 1450; AVX512DQ-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [8,128,64,4,128,1,128,2,32,1,16,128,64,2,16,1] 1451; AVX512DQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 1452; AVX512DQ-NEXT: retq 1453; 1454; AVX512BW-LABEL: constant_shift_v32i8_pairs: 1455; AVX512BW: # %bb.0: 1456; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 1457; AVX512BW-NEXT: vpmovsxbw {{.*#+}} ymm1 = [3,7,6,2,7,0,7,1,5,0,4,7,6,1,4,0] 1458; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0 1459; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 1460; AVX512BW-NEXT: retq 1461; 1462; AVX512DQVL-LABEL: constant_shift_v32i8_pairs: 1463; AVX512DQVL: # %bb.0: 1464; AVX512DQVL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [8,128,64,4,128,1,128,2,32,1,16,128,64,2,16,1] 1465; AVX512DQVL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 1466; AVX512DQVL-NEXT: retq 1467; 1468; AVX512BWVL-LABEL: constant_shift_v32i8_pairs: 1469; AVX512BWVL: # %bb.0: 1470; AVX512BWVL-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 1471; AVX512BWVL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 1472; AVX512BWVL-NEXT: retq 1473; 1474; X86-AVX1-LABEL: constant_shift_v32i8_pairs: 1475; X86-AVX1: # %bb.0: 1476; X86-AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm1 # [8,128,64,4,128,1,128,2] 1477; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1 1478; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1479; X86-AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 # [32,1,16,128,64,2,16,1] 1480; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 1481; X86-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1482; X86-AVX1-NEXT: retl 1483; 1484; X86-AVX2-LABEL: constant_shift_v32i8_pairs: 1485; X86-AVX2: # %bb.0: 1486; X86-AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 # [8,128,64,4,128,1,128,2,32,1,16,128,64,2,16,1] 1487; X86-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 1488; X86-AVX2-NEXT: retl 1489 %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 7, i8 7, i8 6, i8 6, i8 2, i8 2, i8 7, i8 7, i8 0, i8 0, i8 7, i8 7, i8 1, i8 1, i8 5, i8 5, i8 0, i8 0, i8 4, i8 4, i8 7, i8 7, i8 6, i8 6, i8 1, i8 1, i8 4, i8 4, i8 0, i8 0> 1490 ret <32 x i8> %shift 1491} 1492 1493define <32 x i8> @constant_shift_v32i8_quads(<32 x i8> %a) nounwind { 1494; AVX1-LABEL: constant_shift_v32i8_quads: 1495; AVX1: # %bb.0: 1496; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 # [4,4,8,8,1,1,2,2] 1497; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 1498; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1499; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [64,64,128,128,16,16,32,32] 1500; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1501; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1502; AVX1-NEXT: retq 1503; 1504; AVX2-LABEL: constant_shift_v32i8_quads: 1505; AVX2: # %bb.0: 1506; AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 1507; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 1508; AVX2-NEXT: retq 1509; 1510; XOPAVX1-LABEL: constant_shift_v32i8_quads: 1511; XOPAVX1: # %bb.0: 1512; XOPAVX1-NEXT: vpshlb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 1513; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1514; XOPAVX1-NEXT: vpshlb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1515; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1516; XOPAVX1-NEXT: retq 1517; 1518; XOPAVX2-LABEL: constant_shift_v32i8_quads: 1519; XOPAVX2: # %bb.0: 1520; XOPAVX2-NEXT: vpshlb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 1521; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 1522; XOPAVX2-NEXT: vpshlb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1523; XOPAVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 1524; XOPAVX2-NEXT: retq 1525; 1526; AVX512-LABEL: constant_shift_v32i8_quads: 1527; AVX512: # %bb.0: 1528; AVX512-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 1529; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 1530; AVX512-NEXT: retq 1531; 1532; AVX512VL-LABEL: constant_shift_v32i8_quads: 1533; AVX512VL: # %bb.0: 1534; AVX512VL-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 1535; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 1536; AVX512VL-NEXT: retq 1537; 1538; X86-AVX1-LABEL: constant_shift_v32i8_quads: 1539; X86-AVX1: # %bb.0: 1540; X86-AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm1 # [4,4,8,8,1,1,2,2] 1541; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1 1542; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1543; X86-AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 # [64,64,128,128,16,16,32,32] 1544; X86-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 1545; X86-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1546; X86-AVX1-NEXT: retl 1547; 1548; X86-AVX2-LABEL: constant_shift_v32i8_quads: 1549; X86-AVX2: # %bb.0: 1550; X86-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 1551; X86-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 1552; X86-AVX2-NEXT: retl 1553 %shift = shl <32 x i8> %a, <i8 2, i8 2, i8 2, i8 2, i8 3, i8 3, i8 3, i8 3, i8 0, i8 0, i8 0, i8 0, i8 1, i8 1, i8 1, i8 1, i8 6, i8 6, i8 6, i8 6, i8 7, i8 7, i8 7, i8 7, i8 4, i8 4, i8 4, i8 4, i8 5, i8 5, i8 5, i8 5> 1554 ret <32 x i8> %shift 1555} 1556 1557; 1558; Uniform Constant Shifts 1559; 1560 1561define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) nounwind { 1562; AVX1-LABEL: splatconstant_shift_v4i64: 1563; AVX1: # %bb.0: 1564; AVX1-NEXT: vpsllq $7, %xmm0, %xmm1 1565; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1566; AVX1-NEXT: vpsllq $7, %xmm0, %xmm0 1567; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1568; AVX1-NEXT: retq 1569; 1570; AVX2-LABEL: splatconstant_shift_v4i64: 1571; AVX2: # %bb.0: 1572; AVX2-NEXT: vpsllq $7, %ymm0, %ymm0 1573; AVX2-NEXT: retq 1574; 1575; XOPAVX1-LABEL: splatconstant_shift_v4i64: 1576; XOPAVX1: # %bb.0: 1577; XOPAVX1-NEXT: vpsllq $7, %xmm0, %xmm1 1578; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1579; XOPAVX1-NEXT: vpsllq $7, %xmm0, %xmm0 1580; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1581; XOPAVX1-NEXT: retq 1582; 1583; XOPAVX2-LABEL: splatconstant_shift_v4i64: 1584; XOPAVX2: # %bb.0: 1585; XOPAVX2-NEXT: vpsllq $7, %ymm0, %ymm0 1586; XOPAVX2-NEXT: retq 1587; 1588; AVX512-LABEL: splatconstant_shift_v4i64: 1589; AVX512: # %bb.0: 1590; AVX512-NEXT: vpsllq $7, %ymm0, %ymm0 1591; AVX512-NEXT: retq 1592; 1593; AVX512VL-LABEL: splatconstant_shift_v4i64: 1594; AVX512VL: # %bb.0: 1595; AVX512VL-NEXT: vpsllq $7, %ymm0, %ymm0 1596; AVX512VL-NEXT: retq 1597; 1598; X86-AVX1-LABEL: splatconstant_shift_v4i64: 1599; X86-AVX1: # %bb.0: 1600; X86-AVX1-NEXT: vpsllq $7, %xmm0, %xmm1 1601; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1602; X86-AVX1-NEXT: vpsllq $7, %xmm0, %xmm0 1603; X86-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1604; X86-AVX1-NEXT: retl 1605; 1606; X86-AVX2-LABEL: splatconstant_shift_v4i64: 1607; X86-AVX2: # %bb.0: 1608; X86-AVX2-NEXT: vpsllq $7, %ymm0, %ymm0 1609; X86-AVX2-NEXT: retl 1610 %shift = shl <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7> 1611 ret <4 x i64> %shift 1612} 1613 1614define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) nounwind { 1615; AVX1-LABEL: splatconstant_shift_v8i32: 1616; AVX1: # %bb.0: 1617; AVX1-NEXT: vpslld $5, %xmm0, %xmm1 1618; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1619; AVX1-NEXT: vpslld $5, %xmm0, %xmm0 1620; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1621; AVX1-NEXT: retq 1622; 1623; AVX2-LABEL: splatconstant_shift_v8i32: 1624; AVX2: # %bb.0: 1625; AVX2-NEXT: vpslld $5, %ymm0, %ymm0 1626; AVX2-NEXT: retq 1627; 1628; XOPAVX1-LABEL: splatconstant_shift_v8i32: 1629; XOPAVX1: # %bb.0: 1630; XOPAVX1-NEXT: vpslld $5, %xmm0, %xmm1 1631; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1632; XOPAVX1-NEXT: vpslld $5, %xmm0, %xmm0 1633; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1634; XOPAVX1-NEXT: retq 1635; 1636; XOPAVX2-LABEL: splatconstant_shift_v8i32: 1637; XOPAVX2: # %bb.0: 1638; XOPAVX2-NEXT: vpslld $5, %ymm0, %ymm0 1639; XOPAVX2-NEXT: retq 1640; 1641; AVX512-LABEL: splatconstant_shift_v8i32: 1642; AVX512: # %bb.0: 1643; AVX512-NEXT: vpslld $5, %ymm0, %ymm0 1644; AVX512-NEXT: retq 1645; 1646; AVX512VL-LABEL: splatconstant_shift_v8i32: 1647; AVX512VL: # %bb.0: 1648; AVX512VL-NEXT: vpslld $5, %ymm0, %ymm0 1649; AVX512VL-NEXT: retq 1650; 1651; X86-AVX1-LABEL: splatconstant_shift_v8i32: 1652; X86-AVX1: # %bb.0: 1653; X86-AVX1-NEXT: vpslld $5, %xmm0, %xmm1 1654; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1655; X86-AVX1-NEXT: vpslld $5, %xmm0, %xmm0 1656; X86-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1657; X86-AVX1-NEXT: retl 1658; 1659; X86-AVX2-LABEL: splatconstant_shift_v8i32: 1660; X86-AVX2: # %bb.0: 1661; X86-AVX2-NEXT: vpslld $5, %ymm0, %ymm0 1662; X86-AVX2-NEXT: retl 1663 %shift = shl <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> 1664 ret <8 x i32> %shift 1665} 1666 1667define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) nounwind { 1668; AVX1-LABEL: splatconstant_shift_v16i16: 1669; AVX1: # %bb.0: 1670; AVX1-NEXT: vpsllw $3, %xmm0, %xmm1 1671; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1672; AVX1-NEXT: vpsllw $3, %xmm0, %xmm0 1673; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1674; AVX1-NEXT: retq 1675; 1676; AVX2-LABEL: splatconstant_shift_v16i16: 1677; AVX2: # %bb.0: 1678; AVX2-NEXT: vpsllw $3, %ymm0, %ymm0 1679; AVX2-NEXT: retq 1680; 1681; XOPAVX1-LABEL: splatconstant_shift_v16i16: 1682; XOPAVX1: # %bb.0: 1683; XOPAVX1-NEXT: vpsllw $3, %xmm0, %xmm1 1684; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1685; XOPAVX1-NEXT: vpsllw $3, %xmm0, %xmm0 1686; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1687; XOPAVX1-NEXT: retq 1688; 1689; XOPAVX2-LABEL: splatconstant_shift_v16i16: 1690; XOPAVX2: # %bb.0: 1691; XOPAVX2-NEXT: vpsllw $3, %ymm0, %ymm0 1692; XOPAVX2-NEXT: retq 1693; 1694; AVX512-LABEL: splatconstant_shift_v16i16: 1695; AVX512: # %bb.0: 1696; AVX512-NEXT: vpsllw $3, %ymm0, %ymm0 1697; AVX512-NEXT: retq 1698; 1699; AVX512VL-LABEL: splatconstant_shift_v16i16: 1700; AVX512VL: # %bb.0: 1701; AVX512VL-NEXT: vpsllw $3, %ymm0, %ymm0 1702; AVX512VL-NEXT: retq 1703; 1704; X86-AVX1-LABEL: splatconstant_shift_v16i16: 1705; X86-AVX1: # %bb.0: 1706; X86-AVX1-NEXT: vpsllw $3, %xmm0, %xmm1 1707; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1708; X86-AVX1-NEXT: vpsllw $3, %xmm0, %xmm0 1709; X86-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1710; X86-AVX1-NEXT: retl 1711; 1712; X86-AVX2-LABEL: splatconstant_shift_v16i16: 1713; X86-AVX2: # %bb.0: 1714; X86-AVX2-NEXT: vpsllw $3, %ymm0, %ymm0 1715; X86-AVX2-NEXT: retl 1716 %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> 1717 ret <16 x i16> %shift 1718} 1719 1720define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) nounwind { 1721; AVX1-LABEL: splatconstant_shift_v32i8: 1722; AVX1: # %bb.0: 1723; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1724; AVX1-NEXT: vpsllw $3, %xmm1, %xmm1 1725; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248] 1726; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1 1727; AVX1-NEXT: vpsllw $3, %xmm0, %xmm0 1728; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 1729; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1730; AVX1-NEXT: retq 1731; 1732; AVX2-LABEL: splatconstant_shift_v32i8: 1733; AVX2: # %bb.0: 1734; AVX2-NEXT: vpsllw $3, %ymm0, %ymm0 1735; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 1736; AVX2-NEXT: retq 1737; 1738; XOPAVX1-LABEL: splatconstant_shift_v32i8: 1739; XOPAVX1: # %bb.0: 1740; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1741; XOPAVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3] 1742; XOPAVX1-NEXT: vpshlb %xmm2, %xmm1, %xmm1 1743; XOPAVX1-NEXT: vpshlb %xmm2, %xmm0, %xmm0 1744; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1745; XOPAVX1-NEXT: retq 1746; 1747; XOPAVX2-LABEL: splatconstant_shift_v32i8: 1748; XOPAVX2: # %bb.0: 1749; XOPAVX2-NEXT: vpsllw $3, %ymm0, %ymm0 1750; XOPAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 1751; XOPAVX2-NEXT: retq 1752; 1753; AVX512-LABEL: splatconstant_shift_v32i8: 1754; AVX512: # %bb.0: 1755; AVX512-NEXT: vpsllw $3, %ymm0, %ymm0 1756; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 1757; AVX512-NEXT: retq 1758; 1759; AVX512VL-LABEL: splatconstant_shift_v32i8: 1760; AVX512VL: # %bb.0: 1761; AVX512VL-NEXT: vpsllw $3, %ymm0, %ymm0 1762; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0 1763; AVX512VL-NEXT: retq 1764; 1765; X86-AVX1-LABEL: splatconstant_shift_v32i8: 1766; X86-AVX1: # %bb.0: 1767; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1768; X86-AVX1-NEXT: vpsllw $3, %xmm1, %xmm1 1769; X86-AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248] 1770; X86-AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1 1771; X86-AVX1-NEXT: vpsllw $3, %xmm0, %xmm0 1772; X86-AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 1773; X86-AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1774; X86-AVX1-NEXT: retl 1775; 1776; X86-AVX2-LABEL: splatconstant_shift_v32i8: 1777; X86-AVX2: # %bb.0: 1778; X86-AVX2-NEXT: vpsllw $3, %ymm0, %ymm0 1779; X86-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 1780; X86-AVX2-NEXT: retl 1781 %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> 1782 ret <32 x i8> %shift 1783} 1784 1785; 1786; Special Cases 1787; 1788 1789define <4 x i64> @shift32_v4i64(<4 x i64> %a) nounwind { 1790; AVX1-LABEL: shift32_v4i64: 1791; AVX1: # %bb.0: 1792; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 1793; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm1[0,2],ymm0[0,2],ymm1[4,6],ymm0[4,6] 1794; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7] 1795; AVX1-NEXT: retq 1796; 1797; AVX2-LABEL: shift32_v4i64: 1798; AVX2: # %bb.0: 1799; AVX2-NEXT: vpsllq $32, %ymm0, %ymm0 1800; AVX2-NEXT: retq 1801; 1802; XOPAVX1-LABEL: shift32_v4i64: 1803; XOPAVX1: # %bb.0: 1804; XOPAVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 1805; XOPAVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm1[0,2],ymm0[0,2],ymm1[4,6],ymm0[4,6] 1806; XOPAVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7] 1807; XOPAVX1-NEXT: retq 1808; 1809; XOPAVX2-LABEL: shift32_v4i64: 1810; XOPAVX2: # %bb.0: 1811; XOPAVX2-NEXT: vpsllq $32, %ymm0, %ymm0 1812; XOPAVX2-NEXT: retq 1813; 1814; AVX512-LABEL: shift32_v4i64: 1815; AVX512: # %bb.0: 1816; AVX512-NEXT: vpsllq $32, %ymm0, %ymm0 1817; AVX512-NEXT: retq 1818; 1819; AVX512VL-LABEL: shift32_v4i64: 1820; AVX512VL: # %bb.0: 1821; AVX512VL-NEXT: vpsllq $32, %ymm0, %ymm0 1822; AVX512VL-NEXT: retq 1823; 1824; X86-AVX1-LABEL: shift32_v4i64: 1825; X86-AVX1: # %bb.0: 1826; X86-AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 1827; X86-AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm1[0,2],ymm0[0,2],ymm1[4,6],ymm0[4,6] 1828; X86-AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7] 1829; X86-AVX1-NEXT: retl 1830; 1831; X86-AVX2-LABEL: shift32_v4i64: 1832; X86-AVX2: # %bb.0: 1833; X86-AVX2-NEXT: vpsllq $32, %ymm0, %ymm0 1834; X86-AVX2-NEXT: retl 1835 %shift = shl <4 x i64> %a, <i64 32, i64 32, i64 32, i64 32> 1836 ret <4 x i64> %shift 1837} 1838