1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX1 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX2 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512F 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefixes=AVX512VL 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512BW 7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vbmi,+avx512vbmi2 | FileCheck %s --check-prefixes=AVX512VBMI2 8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=AVX512VLBW 9; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vbmi,+avx512vbmi2,+avx512vl | FileCheck %s --check-prefixes=AVX10,AVX512VLVBMI2 10; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx10.1-256 | FileCheck %s --check-prefixes=AVX10,AVX10_256 11; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx10.1-512 | FileCheck %s --check-prefixes=AVX10,AVX512VLVBMI2 12; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx | FileCheck %s --check-prefixes=XOPAVX1 13; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx2 | FileCheck %s --check-prefixes=XOPAVX2 14 15declare <4 x i64> @llvm.fshr.v4i64(<4 x i64>, <4 x i64>, <4 x i64>) 16declare <8 x i32> @llvm.fshr.v8i32(<8 x i32>, <8 x i32>, <8 x i32>) 17declare <16 x i16> @llvm.fshr.v16i16(<16 x i16>, <16 x i16>, <16 x i16>) 18declare <32 x i8> @llvm.fshr.v32i8(<32 x i8>, <32 x i8>, <32 x i8>) 19 20; 21; Variable Shifts 22; 23 24define <4 x i64> @var_funnnel_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> %amt) nounwind { 25; AVX1-LABEL: var_funnnel_v4i64: 26; AVX1: # %bb.0: 27; AVX1-NEXT: vbroadcastsd {{.*#+}} ymm3 = [63,63,63,63] 28; AVX1-NEXT: vandps %ymm3, %ymm2, %ymm4 29; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm5 30; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm6 31; AVX1-NEXT: vpsrlq %xmm5, %xmm6, %xmm7 32; AVX1-NEXT: vpshufd {{.*#+}} xmm5 = xmm5[2,3,2,3] 33; AVX1-NEXT: vpsrlq %xmm5, %xmm6, %xmm5 34; AVX1-NEXT: vpblendw {{.*#+}} xmm5 = xmm7[0,1,2,3],xmm5[4,5,6,7] 35; AVX1-NEXT: vpsrlq %xmm4, %xmm1, %xmm6 36; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm4[2,3,2,3] 37; AVX1-NEXT: vpsrlq %xmm4, %xmm1, %xmm1 38; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm6[0,1,2,3],xmm1[4,5,6,7] 39; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm1, %ymm1 40; AVX1-NEXT: vandnps %ymm3, %ymm2, %ymm2 41; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3 42; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm3[2,3,2,3] 43; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5 44; AVX1-NEXT: vpaddq %xmm5, %xmm5, %xmm5 45; AVX1-NEXT: vpsllq %xmm4, %xmm5, %xmm4 46; AVX1-NEXT: vpsllq %xmm3, %xmm5, %xmm3 47; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm4[4,5,6,7] 48; AVX1-NEXT: vshufps {{.*#+}} xmm4 = xmm2[2,3,2,3] 49; AVX1-NEXT: vpaddq %xmm0, %xmm0, %xmm0 50; AVX1-NEXT: vpsllq %xmm4, %xmm0, %xmm4 51; AVX1-NEXT: vpsllq %xmm2, %xmm0, %xmm0 52; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm4[4,5,6,7] 53; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 54; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 55; AVX1-NEXT: retq 56; 57; AVX2-LABEL: var_funnnel_v4i64: 58; AVX2: # %bb.0: 59; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm3 = [63,63,63,63] 60; AVX2-NEXT: vpand %ymm3, %ymm2, %ymm4 61; AVX2-NEXT: vpsrlvq %ymm4, %ymm1, %ymm1 62; AVX2-NEXT: vpandn %ymm3, %ymm2, %ymm2 63; AVX2-NEXT: vpaddq %ymm0, %ymm0, %ymm0 64; AVX2-NEXT: vpsllvq %ymm2, %ymm0, %ymm0 65; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 66; AVX2-NEXT: retq 67; 68; AVX512F-LABEL: var_funnnel_v4i64: 69; AVX512F: # %bb.0: 70; AVX512F-NEXT: vpbroadcastq {{.*#+}} ymm3 = [63,63,63,63] 71; AVX512F-NEXT: vpand %ymm3, %ymm2, %ymm4 72; AVX512F-NEXT: vpsrlvq %ymm4, %ymm1, %ymm1 73; AVX512F-NEXT: vpandn %ymm3, %ymm2, %ymm2 74; AVX512F-NEXT: vpaddq %ymm0, %ymm0, %ymm0 75; AVX512F-NEXT: vpsllvq %ymm2, %ymm0, %ymm0 76; AVX512F-NEXT: vpor %ymm1, %ymm0, %ymm0 77; AVX512F-NEXT: retq 78; 79; AVX512VL-LABEL: var_funnnel_v4i64: 80; AVX512VL: # %bb.0: 81; AVX512VL-NEXT: vpbroadcastq {{.*#+}} ymm3 = [63,63,63,63] 82; AVX512VL-NEXT: vpand %ymm3, %ymm2, %ymm4 83; AVX512VL-NEXT: vpsrlvq %ymm4, %ymm1, %ymm1 84; AVX512VL-NEXT: vpandn %ymm3, %ymm2, %ymm2 85; AVX512VL-NEXT: vpaddq %ymm0, %ymm0, %ymm0 86; AVX512VL-NEXT: vpsllvq %ymm2, %ymm0, %ymm0 87; AVX512VL-NEXT: vpor %ymm1, %ymm0, %ymm0 88; AVX512VL-NEXT: retq 89; 90; AVX512BW-LABEL: var_funnnel_v4i64: 91; AVX512BW: # %bb.0: 92; AVX512BW-NEXT: vpbroadcastq {{.*#+}} ymm3 = [63,63,63,63] 93; AVX512BW-NEXT: vpand %ymm3, %ymm2, %ymm4 94; AVX512BW-NEXT: vpsrlvq %ymm4, %ymm1, %ymm1 95; AVX512BW-NEXT: vpandn %ymm3, %ymm2, %ymm2 96; AVX512BW-NEXT: vpaddq %ymm0, %ymm0, %ymm0 97; AVX512BW-NEXT: vpsllvq %ymm2, %ymm0, %ymm0 98; AVX512BW-NEXT: vpor %ymm1, %ymm0, %ymm0 99; AVX512BW-NEXT: retq 100; 101; AVX512VBMI2-LABEL: var_funnnel_v4i64: 102; AVX512VBMI2: # %bb.0: 103; AVX512VBMI2-NEXT: # kill: def $ymm2 killed $ymm2 def $zmm2 104; AVX512VBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 105; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 106; AVX512VBMI2-NEXT: vpshrdvq %zmm2, %zmm0, %zmm1 107; AVX512VBMI2-NEXT: vmovdqa %ymm1, %ymm0 108; AVX512VBMI2-NEXT: retq 109; 110; AVX512VLBW-LABEL: var_funnnel_v4i64: 111; AVX512VLBW: # %bb.0: 112; AVX512VLBW-NEXT: vpbroadcastq {{.*#+}} ymm3 = [63,63,63,63] 113; AVX512VLBW-NEXT: vpand %ymm3, %ymm2, %ymm4 114; AVX512VLBW-NEXT: vpsrlvq %ymm4, %ymm1, %ymm1 115; AVX512VLBW-NEXT: vpandn %ymm3, %ymm2, %ymm2 116; AVX512VLBW-NEXT: vpaddq %ymm0, %ymm0, %ymm0 117; AVX512VLBW-NEXT: vpsllvq %ymm2, %ymm0, %ymm0 118; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0 119; AVX512VLBW-NEXT: retq 120; 121; AVX10-LABEL: var_funnnel_v4i64: 122; AVX10: # %bb.0: 123; AVX10-NEXT: vpshrdvq %ymm2, %ymm0, %ymm1 124; AVX10-NEXT: vmovdqa %ymm1, %ymm0 125; AVX10-NEXT: retq 126; 127; XOPAVX1-LABEL: var_funnnel_v4i64: 128; XOPAVX1: # %bb.0: 129; XOPAVX1-NEXT: vbroadcastsd {{.*#+}} ymm3 = [63,63,63,63] 130; XOPAVX1-NEXT: vandps %ymm3, %ymm2, %ymm4 131; XOPAVX1-NEXT: vextractf128 $1, %ymm4, %xmm5 132; XOPAVX1-NEXT: vpxor %xmm6, %xmm6, %xmm6 133; XOPAVX1-NEXT: vpsubq %xmm5, %xmm6, %xmm5 134; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm7 135; XOPAVX1-NEXT: vpshlq %xmm5, %xmm7, %xmm5 136; XOPAVX1-NEXT: vpsubq %xmm4, %xmm6, %xmm4 137; XOPAVX1-NEXT: vpshlq %xmm4, %xmm1, %xmm1 138; XOPAVX1-NEXT: vinsertf128 $1, %xmm5, %ymm1, %ymm1 139; XOPAVX1-NEXT: vandnps %ymm3, %ymm2, %ymm2 140; XOPAVX1-NEXT: vextractf128 $1, %ymm2, %xmm3 141; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 142; XOPAVX1-NEXT: vpaddq %xmm4, %xmm4, %xmm4 143; XOPAVX1-NEXT: vpshlq %xmm3, %xmm4, %xmm3 144; XOPAVX1-NEXT: vpaddq %xmm0, %xmm0, %xmm0 145; XOPAVX1-NEXT: vpshlq %xmm2, %xmm0, %xmm0 146; XOPAVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 147; XOPAVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 148; XOPAVX1-NEXT: retq 149; 150; XOPAVX2-LABEL: var_funnnel_v4i64: 151; XOPAVX2: # %bb.0: 152; XOPAVX2-NEXT: vpbroadcastq {{.*#+}} ymm3 = [63,63,63,63] 153; XOPAVX2-NEXT: vpand %ymm3, %ymm2, %ymm4 154; XOPAVX2-NEXT: vpsrlvq %ymm4, %ymm1, %ymm1 155; XOPAVX2-NEXT: vpandn %ymm3, %ymm2, %ymm2 156; XOPAVX2-NEXT: vpaddq %ymm0, %ymm0, %ymm0 157; XOPAVX2-NEXT: vpsllvq %ymm2, %ymm0, %ymm0 158; XOPAVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 159; XOPAVX2-NEXT: retq 160 %res = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> %amt) 161 ret <4 x i64> %res 162} 163 164define <8 x i32> @var_funnnel_v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i32> %amt) nounwind { 165; AVX1-LABEL: var_funnnel_v8i32: 166; AVX1: # %bb.0: 167; AVX1-NEXT: vbroadcastss {{.*#+}} ymm3 = [31,31,31,31,31,31,31,31] 168; AVX1-NEXT: vandps %ymm3, %ymm2, %ymm2 169; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4 170; AVX1-NEXT: vpsrldq {{.*#+}} xmm5 = xmm4[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 171; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm6 172; AVX1-NEXT: vpsrld %xmm5, %xmm6, %xmm5 173; AVX1-NEXT: vpsrlq $32, %xmm4, %xmm7 174; AVX1-NEXT: vpsrld %xmm7, %xmm6, %xmm7 175; AVX1-NEXT: vpblendw {{.*#+}} xmm5 = xmm7[0,1,2,3],xmm5[4,5,6,7] 176; AVX1-NEXT: vpxor %xmm7, %xmm7, %xmm7 177; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm8 = xmm4[2],xmm7[2],xmm4[3],xmm7[3] 178; AVX1-NEXT: vpsrld %xmm8, %xmm6, %xmm8 179; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm9 = xmm4[0],zero,xmm4[1],zero 180; AVX1-NEXT: vpsrld %xmm9, %xmm6, %xmm6 181; AVX1-NEXT: vpblendw {{.*#+}} xmm6 = xmm6[0,1,2,3],xmm8[4,5,6,7] 182; AVX1-NEXT: vpblendw {{.*#+}} xmm5 = xmm6[0,1],xmm5[2,3],xmm6[4,5],xmm5[6,7] 183; AVX1-NEXT: vpxor %xmm3, %xmm4, %xmm4 184; AVX1-NEXT: vpslld $23, %xmm4, %xmm4 185; AVX1-NEXT: vbroadcastss {{.*#+}} xmm6 = [1065353216,1065353216,1065353216,1065353216] 186; AVX1-NEXT: vpaddd %xmm6, %xmm4, %xmm4 187; AVX1-NEXT: vcvttps2dq %xmm4, %xmm4 188; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm8 189; AVX1-NEXT: vpaddd %xmm8, %xmm8, %xmm8 190; AVX1-NEXT: vpmulld %xmm4, %xmm8, %xmm4 191; AVX1-NEXT: vpor %xmm5, %xmm4, %xmm4 192; AVX1-NEXT: vpsrldq {{.*#+}} xmm5 = xmm2[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 193; AVX1-NEXT: vpsrld %xmm5, %xmm1, %xmm5 194; AVX1-NEXT: vpsrlq $32, %xmm2, %xmm8 195; AVX1-NEXT: vpsrld %xmm8, %xmm1, %xmm8 196; AVX1-NEXT: vpblendw {{.*#+}} xmm5 = xmm8[0,1,2,3],xmm5[4,5,6,7] 197; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm7 = xmm2[2],xmm7[2],xmm2[3],xmm7[3] 198; AVX1-NEXT: vpsrld %xmm7, %xmm1, %xmm7 199; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm8 = xmm2[0],zero,xmm2[1],zero 200; AVX1-NEXT: vpsrld %xmm8, %xmm1, %xmm1 201; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm7[4,5,6,7] 202; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm5[2,3],xmm1[4,5],xmm5[6,7] 203; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm2 204; AVX1-NEXT: vpslld $23, %xmm2, %xmm2 205; AVX1-NEXT: vpaddd %xmm6, %xmm2, %xmm2 206; AVX1-NEXT: vcvttps2dq %xmm2, %xmm2 207; AVX1-NEXT: vpaddd %xmm0, %xmm0, %xmm0 208; AVX1-NEXT: vpmulld %xmm2, %xmm0, %xmm0 209; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 210; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0 211; AVX1-NEXT: retq 212; 213; AVX2-LABEL: var_funnnel_v8i32: 214; AVX2: # %bb.0: 215; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm3 = [31,31,31,31,31,31,31,31] 216; AVX2-NEXT: vpand %ymm3, %ymm2, %ymm4 217; AVX2-NEXT: vpsrlvd %ymm4, %ymm1, %ymm1 218; AVX2-NEXT: vpandn %ymm3, %ymm2, %ymm2 219; AVX2-NEXT: vpaddd %ymm0, %ymm0, %ymm0 220; AVX2-NEXT: vpsllvd %ymm2, %ymm0, %ymm0 221; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 222; AVX2-NEXT: retq 223; 224; AVX512F-LABEL: var_funnnel_v8i32: 225; AVX512F: # %bb.0: 226; AVX512F-NEXT: vpbroadcastd {{.*#+}} ymm3 = [31,31,31,31,31,31,31,31] 227; AVX512F-NEXT: vpand %ymm3, %ymm2, %ymm4 228; AVX512F-NEXT: vpsrlvd %ymm4, %ymm1, %ymm1 229; AVX512F-NEXT: vpandn %ymm3, %ymm2, %ymm2 230; AVX512F-NEXT: vpaddd %ymm0, %ymm0, %ymm0 231; AVX512F-NEXT: vpsllvd %ymm2, %ymm0, %ymm0 232; AVX512F-NEXT: vpor %ymm1, %ymm0, %ymm0 233; AVX512F-NEXT: retq 234; 235; AVX512VL-LABEL: var_funnnel_v8i32: 236; AVX512VL: # %bb.0: 237; AVX512VL-NEXT: vpbroadcastd {{.*#+}} ymm3 = [31,31,31,31,31,31,31,31] 238; AVX512VL-NEXT: vpand %ymm3, %ymm2, %ymm4 239; AVX512VL-NEXT: vpsrlvd %ymm4, %ymm1, %ymm1 240; AVX512VL-NEXT: vpandn %ymm3, %ymm2, %ymm2 241; AVX512VL-NEXT: vpaddd %ymm0, %ymm0, %ymm0 242; AVX512VL-NEXT: vpsllvd %ymm2, %ymm0, %ymm0 243; AVX512VL-NEXT: vpor %ymm1, %ymm0, %ymm0 244; AVX512VL-NEXT: retq 245; 246; AVX512BW-LABEL: var_funnnel_v8i32: 247; AVX512BW: # %bb.0: 248; AVX512BW-NEXT: vpbroadcastd {{.*#+}} ymm3 = [31,31,31,31,31,31,31,31] 249; AVX512BW-NEXT: vpand %ymm3, %ymm2, %ymm4 250; AVX512BW-NEXT: vpsrlvd %ymm4, %ymm1, %ymm1 251; AVX512BW-NEXT: vpandn %ymm3, %ymm2, %ymm2 252; AVX512BW-NEXT: vpaddd %ymm0, %ymm0, %ymm0 253; AVX512BW-NEXT: vpsllvd %ymm2, %ymm0, %ymm0 254; AVX512BW-NEXT: vpor %ymm1, %ymm0, %ymm0 255; AVX512BW-NEXT: retq 256; 257; AVX512VBMI2-LABEL: var_funnnel_v8i32: 258; AVX512VBMI2: # %bb.0: 259; AVX512VBMI2-NEXT: # kill: def $ymm2 killed $ymm2 def $zmm2 260; AVX512VBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 261; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 262; AVX512VBMI2-NEXT: vpshrdvd %zmm2, %zmm0, %zmm1 263; AVX512VBMI2-NEXT: vmovdqa %ymm1, %ymm0 264; AVX512VBMI2-NEXT: retq 265; 266; AVX512VLBW-LABEL: var_funnnel_v8i32: 267; AVX512VLBW: # %bb.0: 268; AVX512VLBW-NEXT: vpbroadcastd {{.*#+}} ymm3 = [31,31,31,31,31,31,31,31] 269; AVX512VLBW-NEXT: vpand %ymm3, %ymm2, %ymm4 270; AVX512VLBW-NEXT: vpsrlvd %ymm4, %ymm1, %ymm1 271; AVX512VLBW-NEXT: vpandn %ymm3, %ymm2, %ymm2 272; AVX512VLBW-NEXT: vpaddd %ymm0, %ymm0, %ymm0 273; AVX512VLBW-NEXT: vpsllvd %ymm2, %ymm0, %ymm0 274; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0 275; AVX512VLBW-NEXT: retq 276; 277; AVX10-LABEL: var_funnnel_v8i32: 278; AVX10: # %bb.0: 279; AVX10-NEXT: vpshrdvd %ymm2, %ymm0, %ymm1 280; AVX10-NEXT: vmovdqa %ymm1, %ymm0 281; AVX10-NEXT: retq 282; 283; XOPAVX1-LABEL: var_funnnel_v8i32: 284; XOPAVX1: # %bb.0: 285; XOPAVX1-NEXT: vbroadcastss {{.*#+}} ymm3 = [31,31,31,31,31,31,31,31] 286; XOPAVX1-NEXT: vandps %ymm3, %ymm2, %ymm2 287; XOPAVX1-NEXT: vextractf128 $1, %ymm2, %xmm4 288; XOPAVX1-NEXT: vpxor %xmm5, %xmm5, %xmm5 289; XOPAVX1-NEXT: vpsubd %xmm4, %xmm5, %xmm6 290; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm7 291; XOPAVX1-NEXT: vpshld %xmm6, %xmm7, %xmm6 292; XOPAVX1-NEXT: vpxor %xmm3, %xmm4, %xmm4 293; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm7 294; XOPAVX1-NEXT: vpaddd %xmm7, %xmm7, %xmm7 295; XOPAVX1-NEXT: vpshld %xmm4, %xmm7, %xmm4 296; XOPAVX1-NEXT: vpor %xmm6, %xmm4, %xmm4 297; XOPAVX1-NEXT: vpsubd %xmm2, %xmm5, %xmm5 298; XOPAVX1-NEXT: vpshld %xmm5, %xmm1, %xmm1 299; XOPAVX1-NEXT: vpxor %xmm3, %xmm2, %xmm2 300; XOPAVX1-NEXT: vpaddd %xmm0, %xmm0, %xmm0 301; XOPAVX1-NEXT: vpshld %xmm2, %xmm0, %xmm0 302; XOPAVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 303; XOPAVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0 304; XOPAVX1-NEXT: retq 305; 306; XOPAVX2-LABEL: var_funnnel_v8i32: 307; XOPAVX2: # %bb.0: 308; XOPAVX2-NEXT: vpbroadcastd {{.*#+}} ymm3 = [31,31,31,31,31,31,31,31] 309; XOPAVX2-NEXT: vpand %ymm3, %ymm2, %ymm4 310; XOPAVX2-NEXT: vpsrlvd %ymm4, %ymm1, %ymm1 311; XOPAVX2-NEXT: vpandn %ymm3, %ymm2, %ymm2 312; XOPAVX2-NEXT: vpaddd %ymm0, %ymm0, %ymm0 313; XOPAVX2-NEXT: vpsllvd %ymm2, %ymm0, %ymm0 314; XOPAVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 315; XOPAVX2-NEXT: retq 316 %res = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i32> %amt) 317 ret <8 x i32> %res 318} 319 320define <16 x i16> @var_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i16> %amt) nounwind { 321; AVX1-LABEL: var_funnnel_v16i16: 322; AVX1: # %bb.0: 323; AVX1-NEXT: vbroadcastss {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 324; AVX1-NEXT: vandps %ymm3, %ymm2, %ymm2 325; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4 326; AVX1-NEXT: vpsllw $12, %xmm4, %xmm5 327; AVX1-NEXT: vpsllw $4, %xmm4, %xmm6 328; AVX1-NEXT: vpor %xmm5, %xmm6, %xmm5 329; AVX1-NEXT: vpaddw %xmm5, %xmm5, %xmm6 330; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm7 331; AVX1-NEXT: vpsrlw $8, %xmm7, %xmm8 332; AVX1-NEXT: vpblendvb %xmm5, %xmm8, %xmm7, %xmm5 333; AVX1-NEXT: vpsrlw $4, %xmm5, %xmm7 334; AVX1-NEXT: vpblendvb %xmm6, %xmm7, %xmm5, %xmm5 335; AVX1-NEXT: vpsrlw $2, %xmm5, %xmm7 336; AVX1-NEXT: vpaddw %xmm6, %xmm6, %xmm6 337; AVX1-NEXT: vpblendvb %xmm6, %xmm7, %xmm5, %xmm5 338; AVX1-NEXT: vpsrlw $1, %xmm5, %xmm7 339; AVX1-NEXT: vpaddw %xmm6, %xmm6, %xmm6 340; AVX1-NEXT: vpblendvb %xmm6, %xmm7, %xmm5, %xmm5 341; AVX1-NEXT: vpxor %xmm3, %xmm4, %xmm6 342; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm4 = xmm6[4,4,5,5,6,6,7,7] 343; AVX1-NEXT: vpslld $23, %xmm4, %xmm7 344; AVX1-NEXT: vbroadcastss {{.*#+}} xmm4 = [1065353216,1065353216,1065353216,1065353216] 345; AVX1-NEXT: vpaddd %xmm4, %xmm7, %xmm7 346; AVX1-NEXT: vcvttps2dq %xmm7, %xmm7 347; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm6 = xmm6[0],zero,xmm6[1],zero,xmm6[2],zero,xmm6[3],zero 348; AVX1-NEXT: vpslld $23, %xmm6, %xmm6 349; AVX1-NEXT: vpaddd %xmm4, %xmm6, %xmm6 350; AVX1-NEXT: vcvttps2dq %xmm6, %xmm6 351; AVX1-NEXT: vpackusdw %xmm7, %xmm6, %xmm6 352; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm7 353; AVX1-NEXT: vpaddw %xmm7, %xmm7, %xmm7 354; AVX1-NEXT: vpmullw %xmm6, %xmm7, %xmm6 355; AVX1-NEXT: vpor %xmm5, %xmm6, %xmm5 356; AVX1-NEXT: vpsllw $12, %xmm2, %xmm6 357; AVX1-NEXT: vpsllw $4, %xmm2, %xmm7 358; AVX1-NEXT: vpor %xmm6, %xmm7, %xmm6 359; AVX1-NEXT: vpaddw %xmm6, %xmm6, %xmm7 360; AVX1-NEXT: vpsrlw $8, %xmm1, %xmm8 361; AVX1-NEXT: vpblendvb %xmm6, %xmm8, %xmm1, %xmm1 362; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm6 363; AVX1-NEXT: vpblendvb %xmm7, %xmm6, %xmm1, %xmm1 364; AVX1-NEXT: vpsrlw $2, %xmm1, %xmm6 365; AVX1-NEXT: vpaddw %xmm7, %xmm7, %xmm7 366; AVX1-NEXT: vpblendvb %xmm7, %xmm6, %xmm1, %xmm1 367; AVX1-NEXT: vpsrlw $1, %xmm1, %xmm6 368; AVX1-NEXT: vpaddw %xmm7, %xmm7, %xmm7 369; AVX1-NEXT: vpblendvb %xmm7, %xmm6, %xmm1, %xmm1 370; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm2 371; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm3 = xmm2[4,4,5,5,6,6,7,7] 372; AVX1-NEXT: vpslld $23, %xmm3, %xmm3 373; AVX1-NEXT: vpaddd %xmm4, %xmm3, %xmm3 374; AVX1-NEXT: vcvttps2dq %xmm3, %xmm3 375; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero 376; AVX1-NEXT: vpslld $23, %xmm2, %xmm2 377; AVX1-NEXT: vpaddd %xmm4, %xmm2, %xmm2 378; AVX1-NEXT: vcvttps2dq %xmm2, %xmm2 379; AVX1-NEXT: vpackusdw %xmm3, %xmm2, %xmm2 380; AVX1-NEXT: vpaddw %xmm0, %xmm0, %xmm0 381; AVX1-NEXT: vpmullw %xmm2, %xmm0, %xmm0 382; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 383; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm0, %ymm0 384; AVX1-NEXT: retq 385; 386; AVX2-LABEL: var_funnnel_v16i16: 387; AVX2: # %bb.0: 388; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15] 389; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 390; AVX2-NEXT: vpxor %xmm4, %xmm4, %xmm4 391; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm5 = ymm2[4],ymm4[4],ymm2[5],ymm4[5],ymm2[6],ymm4[6],ymm2[7],ymm4[7],ymm2[12],ymm4[12],ymm2[13],ymm4[13],ymm2[14],ymm4[14],ymm2[15],ymm4[15] 392; AVX2-NEXT: vpsrlvd %ymm5, %ymm3, %ymm3 393; AVX2-NEXT: vpblendw {{.*#+}} ymm3 = ymm3[0],ymm4[1],ymm3[2],ymm4[3],ymm3[4],ymm4[5],ymm3[6],ymm4[7],ymm3[8],ymm4[9],ymm3[10],ymm4[11],ymm3[12],ymm4[13],ymm3[14],ymm4[15] 394; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11] 395; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm2[0],ymm4[0],ymm2[1],ymm4[1],ymm2[2],ymm4[2],ymm2[3],ymm4[3],ymm2[8],ymm4[8],ymm2[9],ymm4[9],ymm2[10],ymm4[10],ymm2[11],ymm4[11] 396; AVX2-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 397; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm4[1],ymm0[2],ymm4[3],ymm0[4],ymm4[5],ymm0[6],ymm4[7],ymm0[8],ymm4[9],ymm0[10],ymm4[11],ymm0[12],ymm4[13],ymm0[14],ymm4[15] 398; AVX2-NEXT: vpackusdw %ymm3, %ymm0, %ymm0 399; AVX2-NEXT: retq 400; 401; AVX512F-LABEL: var_funnnel_v16i16: 402; AVX512F: # %bb.0: 403; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero 404; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero 405; AVX512F-NEXT: vpslld $16, %zmm0, %zmm0 406; AVX512F-NEXT: vpord %zmm1, %zmm0, %zmm0 407; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm1 408; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero 409; AVX512F-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0 410; AVX512F-NEXT: vpmovdw %zmm0, %ymm0 411; AVX512F-NEXT: retq 412; 413; AVX512VL-LABEL: var_funnnel_v16i16: 414; AVX512VL: # %bb.0: 415; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero 416; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero 417; AVX512VL-NEXT: vpslld $16, %zmm0, %zmm0 418; AVX512VL-NEXT: vpord %zmm1, %zmm0, %zmm0 419; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm2, %ymm1 420; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero 421; AVX512VL-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0 422; AVX512VL-NEXT: vpmovdw %zmm0, %ymm0 423; AVX512VL-NEXT: retq 424; 425; AVX512BW-LABEL: var_funnnel_v16i16: 426; AVX512BW: # %bb.0: 427; AVX512BW-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 428; AVX512BW-NEXT: vpbroadcastw {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 429; AVX512BW-NEXT: vpand %ymm3, %ymm2, %ymm4 430; AVX512BW-NEXT: vpsrlvw %zmm4, %zmm1, %zmm1 431; AVX512BW-NEXT: vpandn %ymm3, %ymm2, %ymm2 432; AVX512BW-NEXT: vpaddw %ymm0, %ymm0, %ymm0 433; AVX512BW-NEXT: vpsllvw %zmm2, %zmm0, %zmm0 434; AVX512BW-NEXT: vpor %ymm1, %ymm0, %ymm0 435; AVX512BW-NEXT: retq 436; 437; AVX512VBMI2-LABEL: var_funnnel_v16i16: 438; AVX512VBMI2: # %bb.0: 439; AVX512VBMI2-NEXT: # kill: def $ymm2 killed $ymm2 def $zmm2 440; AVX512VBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 441; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 442; AVX512VBMI2-NEXT: vpshrdvw %zmm2, %zmm0, %zmm1 443; AVX512VBMI2-NEXT: vmovdqa %ymm1, %ymm0 444; AVX512VBMI2-NEXT: retq 445; 446; AVX512VLBW-LABEL: var_funnnel_v16i16: 447; AVX512VLBW: # %bb.0: 448; AVX512VLBW-NEXT: vpbroadcastw {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 449; AVX512VLBW-NEXT: vpand %ymm3, %ymm2, %ymm4 450; AVX512VLBW-NEXT: vpsrlvw %ymm4, %ymm1, %ymm1 451; AVX512VLBW-NEXT: vpandn %ymm3, %ymm2, %ymm2 452; AVX512VLBW-NEXT: vpaddw %ymm0, %ymm0, %ymm0 453; AVX512VLBW-NEXT: vpsllvw %ymm2, %ymm0, %ymm0 454; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0 455; AVX512VLBW-NEXT: retq 456; 457; AVX10-LABEL: var_funnnel_v16i16: 458; AVX10: # %bb.0: 459; AVX10-NEXT: vpshrdvw %ymm2, %ymm0, %ymm1 460; AVX10-NEXT: vmovdqa %ymm1, %ymm0 461; AVX10-NEXT: retq 462; 463; XOPAVX1-LABEL: var_funnnel_v16i16: 464; XOPAVX1: # %bb.0: 465; XOPAVX1-NEXT: vbroadcastss {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 466; XOPAVX1-NEXT: vandps %ymm3, %ymm2, %ymm2 467; XOPAVX1-NEXT: vextractf128 $1, %ymm2, %xmm4 468; XOPAVX1-NEXT: vpxor %xmm5, %xmm5, %xmm5 469; XOPAVX1-NEXT: vpsubw %xmm4, %xmm5, %xmm6 470; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm7 471; XOPAVX1-NEXT: vpshlw %xmm6, %xmm7, %xmm6 472; XOPAVX1-NEXT: vpxor %xmm3, %xmm4, %xmm4 473; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm7 474; XOPAVX1-NEXT: vpaddw %xmm7, %xmm7, %xmm7 475; XOPAVX1-NEXT: vpshlw %xmm4, %xmm7, %xmm4 476; XOPAVX1-NEXT: vpor %xmm6, %xmm4, %xmm4 477; XOPAVX1-NEXT: vpsubw %xmm2, %xmm5, %xmm5 478; XOPAVX1-NEXT: vpshlw %xmm5, %xmm1, %xmm1 479; XOPAVX1-NEXT: vpxor %xmm3, %xmm2, %xmm2 480; XOPAVX1-NEXT: vpaddw %xmm0, %xmm0, %xmm0 481; XOPAVX1-NEXT: vpshlw %xmm2, %xmm0, %xmm0 482; XOPAVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 483; XOPAVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0 484; XOPAVX1-NEXT: retq 485; 486; XOPAVX2-LABEL: var_funnnel_v16i16: 487; XOPAVX2: # %bb.0: 488; XOPAVX2-NEXT: vpbroadcastw {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 489; XOPAVX2-NEXT: vpandn %ymm3, %ymm2, %ymm4 490; XOPAVX2-NEXT: vextracti128 $1, %ymm4, %xmm5 491; XOPAVX2-NEXT: vpaddw %ymm0, %ymm0, %ymm0 492; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm6 493; XOPAVX2-NEXT: vpshlw %xmm5, %xmm6, %xmm5 494; XOPAVX2-NEXT: vpshlw %xmm4, %xmm0, %xmm0 495; XOPAVX2-NEXT: vinserti128 $1, %xmm5, %ymm0, %ymm0 496; XOPAVX2-NEXT: vpand %ymm3, %ymm2, %ymm2 497; XOPAVX2-NEXT: vextracti128 $1, %ymm2, %xmm3 498; XOPAVX2-NEXT: vpxor %xmm4, %xmm4, %xmm4 499; XOPAVX2-NEXT: vpsubw %xmm3, %xmm4, %xmm3 500; XOPAVX2-NEXT: vextracti128 $1, %ymm1, %xmm5 501; XOPAVX2-NEXT: vpshlw %xmm3, %xmm5, %xmm3 502; XOPAVX2-NEXT: vpsubw %xmm2, %xmm4, %xmm2 503; XOPAVX2-NEXT: vpshlw %xmm2, %xmm1, %xmm1 504; XOPAVX2-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1 505; XOPAVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 506; XOPAVX2-NEXT: retq 507 %res = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i16> %amt) 508 ret <16 x i16> %res 509} 510 511define <32 x i8> @var_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> %amt) nounwind { 512; AVX1-LABEL: var_funnnel_v32i8: 513; AVX1: # %bb.0: 514; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm5 515; AVX1-NEXT: vpsrlw $4, %xmm5, %xmm3 516; AVX1-NEXT: vbroadcastss {{.*#+}} xmm4 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 517; AVX1-NEXT: vpand %xmm4, %xmm3, %xmm6 518; AVX1-NEXT: vbroadcastss {{.*#+}} ymm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7] 519; AVX1-NEXT: vandps %ymm3, %ymm2, %ymm2 520; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm7 521; AVX1-NEXT: vpsllw $5, %xmm7, %xmm8 522; AVX1-NEXT: vpblendvb %xmm8, %xmm6, %xmm5, %xmm6 523; AVX1-NEXT: vpsrlw $2, %xmm6, %xmm9 524; AVX1-NEXT: vbroadcastss {{.*#+}} xmm5 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63] 525; AVX1-NEXT: vpand %xmm5, %xmm9, %xmm9 526; AVX1-NEXT: vpaddb %xmm8, %xmm8, %xmm8 527; AVX1-NEXT: vpblendvb %xmm8, %xmm9, %xmm6, %xmm9 528; AVX1-NEXT: vpsrlw $1, %xmm9, %xmm10 529; AVX1-NEXT: vbroadcastss {{.*#+}} xmm6 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] 530; AVX1-NEXT: vpand %xmm6, %xmm10, %xmm10 531; AVX1-NEXT: vpaddb %xmm8, %xmm8, %xmm8 532; AVX1-NEXT: vpblendvb %xmm8, %xmm10, %xmm9, %xmm8 533; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm9 534; AVX1-NEXT: vpaddb %xmm9, %xmm9, %xmm9 535; AVX1-NEXT: vpsllw $4, %xmm9, %xmm10 536; AVX1-NEXT: vbroadcastss {{.*#+}} xmm11 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240] 537; AVX1-NEXT: vpand %xmm11, %xmm10, %xmm10 538; AVX1-NEXT: vpxor %xmm3, %xmm7, %xmm7 539; AVX1-NEXT: vpsllw $5, %xmm7, %xmm7 540; AVX1-NEXT: vpblendvb %xmm7, %xmm10, %xmm9, %xmm9 541; AVX1-NEXT: vpsllw $2, %xmm9, %xmm10 542; AVX1-NEXT: vbroadcastss {{.*#+}} xmm12 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252] 543; AVX1-NEXT: vpand %xmm12, %xmm10, %xmm10 544; AVX1-NEXT: vpaddb %xmm7, %xmm7, %xmm7 545; AVX1-NEXT: vpblendvb %xmm7, %xmm10, %xmm9, %xmm9 546; AVX1-NEXT: vpaddb %xmm9, %xmm9, %xmm10 547; AVX1-NEXT: vpaddb %xmm7, %xmm7, %xmm7 548; AVX1-NEXT: vpblendvb %xmm7, %xmm10, %xmm9, %xmm7 549; AVX1-NEXT: vpor %xmm7, %xmm8, %xmm7 550; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm8 551; AVX1-NEXT: vpand %xmm4, %xmm8, %xmm4 552; AVX1-NEXT: vpsllw $5, %xmm2, %xmm8 553; AVX1-NEXT: vpblendvb %xmm8, %xmm4, %xmm1, %xmm1 554; AVX1-NEXT: vpsrlw $2, %xmm1, %xmm4 555; AVX1-NEXT: vpand %xmm5, %xmm4, %xmm4 556; AVX1-NEXT: vpaddb %xmm8, %xmm8, %xmm5 557; AVX1-NEXT: vpblendvb %xmm5, %xmm4, %xmm1, %xmm1 558; AVX1-NEXT: vpsrlw $1, %xmm1, %xmm4 559; AVX1-NEXT: vpand %xmm6, %xmm4, %xmm4 560; AVX1-NEXT: vpaddb %xmm5, %xmm5, %xmm5 561; AVX1-NEXT: vpblendvb %xmm5, %xmm4, %xmm1, %xmm1 562; AVX1-NEXT: vpaddb %xmm0, %xmm0, %xmm0 563; AVX1-NEXT: vpsllw $4, %xmm0, %xmm4 564; AVX1-NEXT: vpand %xmm4, %xmm11, %xmm4 565; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm2 566; AVX1-NEXT: vpsllw $5, %xmm2, %xmm2 567; AVX1-NEXT: vpblendvb %xmm2, %xmm4, %xmm0, %xmm0 568; AVX1-NEXT: vpsllw $2, %xmm0, %xmm3 569; AVX1-NEXT: vpand %xmm3, %xmm12, %xmm3 570; AVX1-NEXT: vpaddb %xmm2, %xmm2, %xmm2 571; AVX1-NEXT: vpblendvb %xmm2, %xmm3, %xmm0, %xmm0 572; AVX1-NEXT: vpaddb %xmm0, %xmm0, %xmm3 573; AVX1-NEXT: vpaddb %xmm2, %xmm2, %xmm2 574; AVX1-NEXT: vpblendvb %xmm2, %xmm3, %xmm0, %xmm0 575; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 576; AVX1-NEXT: vinsertf128 $1, %xmm7, %ymm0, %ymm0 577; AVX1-NEXT: retq 578; 579; AVX2-LABEL: var_funnnel_v32i8: 580; AVX2: # %bb.0: 581; AVX2-NEXT: vpbroadcastb {{.*#+}} ymm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7] 582; AVX2-NEXT: vpand %ymm3, %ymm2, %ymm4 583; AVX2-NEXT: vpsllw $5, %ymm4, %ymm4 584; AVX2-NEXT: vpaddb %ymm4, %ymm4, %ymm5 585; AVX2-NEXT: vpsrlw $4, %ymm1, %ymm6 586; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm6, %ymm6 587; AVX2-NEXT: vpblendvb %ymm4, %ymm6, %ymm1, %ymm1 588; AVX2-NEXT: vpsrlw $2, %ymm1, %ymm4 589; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4 590; AVX2-NEXT: vpblendvb %ymm5, %ymm4, %ymm1, %ymm1 591; AVX2-NEXT: vpsrlw $1, %ymm1, %ymm4 592; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4 593; AVX2-NEXT: vpaddb %ymm5, %ymm5, %ymm5 594; AVX2-NEXT: vpblendvb %ymm5, %ymm4, %ymm1, %ymm1 595; AVX2-NEXT: vpandn %ymm3, %ymm2, %ymm2 596; AVX2-NEXT: vpsllw $5, %ymm2, %ymm2 597; AVX2-NEXT: vpaddb %ymm2, %ymm2, %ymm3 598; AVX2-NEXT: vpaddb %ymm0, %ymm0, %ymm0 599; AVX2-NEXT: vpsllw $4, %ymm0, %ymm4 600; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4 601; AVX2-NEXT: vpblendvb %ymm2, %ymm4, %ymm0, %ymm0 602; AVX2-NEXT: vpsllw $2, %ymm0, %ymm2 603; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 604; AVX2-NEXT: vpblendvb %ymm3, %ymm2, %ymm0, %ymm0 605; AVX2-NEXT: vpaddb %ymm0, %ymm0, %ymm2 606; AVX2-NEXT: vpaddb %ymm3, %ymm3, %ymm3 607; AVX2-NEXT: vpblendvb %ymm3, %ymm2, %ymm0, %ymm0 608; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 609; AVX2-NEXT: retq 610; 611; AVX512F-LABEL: var_funnnel_v32i8: 612; AVX512F: # %bb.0: 613; AVX512F-NEXT: vpbroadcastb {{.*#+}} ymm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7] 614; AVX512F-NEXT: vpand %ymm3, %ymm2, %ymm4 615; AVX512F-NEXT: vpsllw $5, %ymm4, %ymm4 616; AVX512F-NEXT: vpaddb %ymm4, %ymm4, %ymm5 617; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm6 618; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm6, %ymm6 619; AVX512F-NEXT: vpblendvb %ymm4, %ymm6, %ymm1, %ymm1 620; AVX512F-NEXT: vpsrlw $2, %ymm1, %ymm4 621; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4 622; AVX512F-NEXT: vpblendvb %ymm5, %ymm4, %ymm1, %ymm1 623; AVX512F-NEXT: vpsrlw $1, %ymm1, %ymm4 624; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4 625; AVX512F-NEXT: vpaddb %ymm5, %ymm5, %ymm5 626; AVX512F-NEXT: vpblendvb %ymm5, %ymm4, %ymm1, %ymm1 627; AVX512F-NEXT: vpandn %ymm3, %ymm2, %ymm2 628; AVX512F-NEXT: vpsllw $5, %ymm2, %ymm2 629; AVX512F-NEXT: vpaddb %ymm2, %ymm2, %ymm3 630; AVX512F-NEXT: vpaddb %ymm0, %ymm0, %ymm0 631; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm4 632; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4 633; AVX512F-NEXT: vpblendvb %ymm2, %ymm4, %ymm0, %ymm0 634; AVX512F-NEXT: vpsllw $2, %ymm0, %ymm2 635; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 636; AVX512F-NEXT: vpblendvb %ymm3, %ymm2, %ymm0, %ymm0 637; AVX512F-NEXT: vpaddb %ymm0, %ymm0, %ymm2 638; AVX512F-NEXT: vpaddb %ymm3, %ymm3, %ymm3 639; AVX512F-NEXT: vpblendvb %ymm3, %ymm2, %ymm0, %ymm0 640; AVX512F-NEXT: vpor %ymm1, %ymm0, %ymm0 641; AVX512F-NEXT: retq 642; 643; AVX512VL-LABEL: var_funnnel_v32i8: 644; AVX512VL: # %bb.0: 645; AVX512VL-NEXT: vpbroadcastd {{.*#+}} ymm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7] 646; AVX512VL-NEXT: vpand %ymm3, %ymm2, %ymm4 647; AVX512VL-NEXT: vpsllw $5, %ymm4, %ymm4 648; AVX512VL-NEXT: vpaddb %ymm4, %ymm4, %ymm5 649; AVX512VL-NEXT: vpsrlw $4, %ymm1, %ymm6 650; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm6, %ymm6 651; AVX512VL-NEXT: vpblendvb %ymm4, %ymm6, %ymm1, %ymm1 652; AVX512VL-NEXT: vpsrlw $2, %ymm1, %ymm4 653; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm4, %ymm4 654; AVX512VL-NEXT: vpblendvb %ymm5, %ymm4, %ymm1, %ymm1 655; AVX512VL-NEXT: vpsrlw $1, %ymm1, %ymm4 656; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm4, %ymm4 657; AVX512VL-NEXT: vpaddb %ymm5, %ymm5, %ymm5 658; AVX512VL-NEXT: vpblendvb %ymm5, %ymm4, %ymm1, %ymm1 659; AVX512VL-NEXT: vpandn %ymm3, %ymm2, %ymm2 660; AVX512VL-NEXT: vpsllw $5, %ymm2, %ymm2 661; AVX512VL-NEXT: vpaddb %ymm2, %ymm2, %ymm3 662; AVX512VL-NEXT: vpaddb %ymm0, %ymm0, %ymm0 663; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm4 664; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm4, %ymm4 665; AVX512VL-NEXT: vpblendvb %ymm2, %ymm4, %ymm0, %ymm0 666; AVX512VL-NEXT: vpsllw $2, %ymm0, %ymm2 667; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm2, %ymm2 668; AVX512VL-NEXT: vpblendvb %ymm3, %ymm2, %ymm0, %ymm0 669; AVX512VL-NEXT: vpaddb %ymm0, %ymm0, %ymm2 670; AVX512VL-NEXT: vpaddb %ymm3, %ymm3, %ymm3 671; AVX512VL-NEXT: vpblendvb %ymm3, %ymm2, %ymm0, %ymm0 672; AVX512VL-NEXT: vpor %ymm1, %ymm0, %ymm0 673; AVX512VL-NEXT: retq 674; 675; AVX512BW-LABEL: var_funnnel_v32i8: 676; AVX512BW: # %bb.0: 677; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero 678; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero 679; AVX512BW-NEXT: vpsllw $8, %zmm0, %zmm0 680; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0 681; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm1 682; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero 683; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0 684; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 685; AVX512BW-NEXT: retq 686; 687; AVX512VBMI2-LABEL: var_funnnel_v32i8: 688; AVX512VBMI2: # %bb.0: 689; AVX512VBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 690; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 691; AVX512VBMI2-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,64,1,65,2,66,3,67,4,68,5,69,6,70,7,71,8,72,9,73,10,74,11,75,12,76,13,77,14,78,15,79,16,80,17,81,18,82,19,83,20,84,21,85,22,86,23,87,24,88,25,89,26,90,27,91,28,92,29,93,30,94,31,95] 692; AVX512VBMI2-NEXT: vpermi2b %zmm0, %zmm1, %zmm3 693; AVX512VBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm0 694; AVX512VBMI2-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero 695; AVX512VBMI2-NEXT: vpsrlvw %zmm0, %zmm3, %zmm0 696; AVX512VBMI2-NEXT: vpmovwb %zmm0, %ymm0 697; AVX512VBMI2-NEXT: retq 698; 699; AVX512VLBW-LABEL: var_funnnel_v32i8: 700; AVX512VLBW: # %bb.0: 701; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero 702; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero 703; AVX512VLBW-NEXT: vpsllw $8, %zmm0, %zmm0 704; AVX512VLBW-NEXT: vporq %zmm1, %zmm0, %zmm0 705; AVX512VLBW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm2, %ymm1 706; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero 707; AVX512VLBW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0 708; AVX512VLBW-NEXT: vpmovwb %zmm0, %ymm0 709; AVX512VLBW-NEXT: retq 710; 711; AVX512VLVBMI2-LABEL: var_funnnel_v32i8: 712; AVX512VLVBMI2: # %bb.0: 713; AVX512VLVBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 714; AVX512VLVBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 715; AVX512VLVBMI2-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,64,1,65,2,66,3,67,4,68,5,69,6,70,7,71,8,72,9,73,10,74,11,75,12,76,13,77,14,78,15,79,16,80,17,81,18,82,19,83,20,84,21,85,22,86,23,87,24,88,25,89,26,90,27,91,28,92,29,93,30,94,31,95] 716; AVX512VLVBMI2-NEXT: vpermi2b %zmm0, %zmm1, %zmm3 717; AVX512VLVBMI2-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm2, %ymm0 718; AVX512VLVBMI2-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero 719; AVX512VLVBMI2-NEXT: vpsrlvw %zmm0, %zmm3, %zmm0 720; AVX512VLVBMI2-NEXT: vpmovwb %zmm0, %ymm0 721; AVX512VLVBMI2-NEXT: retq 722; 723; AVX10_256-LABEL: var_funnnel_v32i8: 724; AVX10_256: # %bb.0: 725; AVX10_256-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31] 726; AVX10_256-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm2, %ymm2 727; AVX10_256-NEXT: vpxor %xmm4, %xmm4, %xmm4 728; AVX10_256-NEXT: vpunpckhbw {{.*#+}} ymm5 = ymm2[8],ymm4[8],ymm2[9],ymm4[9],ymm2[10],ymm4[10],ymm2[11],ymm4[11],ymm2[12],ymm4[12],ymm2[13],ymm4[13],ymm2[14],ymm4[14],ymm2[15],ymm4[15],ymm2[24],ymm4[24],ymm2[25],ymm4[25],ymm2[26],ymm4[26],ymm2[27],ymm4[27],ymm2[28],ymm4[28],ymm2[29],ymm4[29],ymm2[30],ymm4[30],ymm2[31],ymm4[31] 729; AVX10_256-NEXT: vpsrlvw %ymm5, %ymm3, %ymm3 730; AVX10_256-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23] 731; AVX10_256-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm2[0],ymm4[0],ymm2[1],ymm4[1],ymm2[2],ymm4[2],ymm2[3],ymm4[3],ymm2[4],ymm4[4],ymm2[5],ymm4[5],ymm2[6],ymm4[6],ymm2[7],ymm4[7],ymm2[16],ymm4[16],ymm2[17],ymm4[17],ymm2[18],ymm4[18],ymm2[19],ymm4[19],ymm2[20],ymm4[20],ymm2[21],ymm4[21],ymm2[22],ymm4[22],ymm2[23],ymm4[23] 732; AVX10_256-NEXT: vpsrlvw %ymm1, %ymm0, %ymm1 733; AVX10_256-NEXT: vmovdqa {{.*#+}} ymm0 = [0,2,4,6,8,10,12,14,32,34,36,38,40,42,44,46,16,18,20,22,24,26,28,30,48,50,52,54,56,58,60,62] 734; AVX10_256-NEXT: vpermi2b %ymm3, %ymm1, %ymm0 735; AVX10_256-NEXT: retq 736; 737; XOPAVX1-LABEL: var_funnnel_v32i8: 738; XOPAVX1: # %bb.0: 739; XOPAVX1-NEXT: vbroadcastss {{.*#+}} ymm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7] 740; XOPAVX1-NEXT: vandps %ymm3, %ymm2, %ymm2 741; XOPAVX1-NEXT: vextractf128 $1, %ymm2, %xmm4 742; XOPAVX1-NEXT: vpxor %xmm5, %xmm5, %xmm5 743; XOPAVX1-NEXT: vpsubb %xmm4, %xmm5, %xmm6 744; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm7 745; XOPAVX1-NEXT: vpshlb %xmm6, %xmm7, %xmm6 746; XOPAVX1-NEXT: vpxor %xmm3, %xmm4, %xmm4 747; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm7 748; XOPAVX1-NEXT: vpaddb %xmm7, %xmm7, %xmm7 749; XOPAVX1-NEXT: vpshlb %xmm4, %xmm7, %xmm4 750; XOPAVX1-NEXT: vpor %xmm6, %xmm4, %xmm4 751; XOPAVX1-NEXT: vpsubb %xmm2, %xmm5, %xmm5 752; XOPAVX1-NEXT: vpshlb %xmm5, %xmm1, %xmm1 753; XOPAVX1-NEXT: vpxor %xmm3, %xmm2, %xmm2 754; XOPAVX1-NEXT: vpaddb %xmm0, %xmm0, %xmm0 755; XOPAVX1-NEXT: vpshlb %xmm2, %xmm0, %xmm0 756; XOPAVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 757; XOPAVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0 758; XOPAVX1-NEXT: retq 759; 760; XOPAVX2-LABEL: var_funnnel_v32i8: 761; XOPAVX2: # %bb.0: 762; XOPAVX2-NEXT: vpbroadcastb {{.*#+}} ymm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7] 763; XOPAVX2-NEXT: vpand %ymm3, %ymm2, %ymm2 764; XOPAVX2-NEXT: vextracti128 $1, %ymm2, %xmm4 765; XOPAVX2-NEXT: vpxor %xmm5, %xmm5, %xmm5 766; XOPAVX2-NEXT: vpsubb %xmm4, %xmm5, %xmm6 767; XOPAVX2-NEXT: vextracti128 $1, %ymm1, %xmm7 768; XOPAVX2-NEXT: vpshlb %xmm6, %xmm7, %xmm6 769; XOPAVX2-NEXT: vpsubb %xmm2, %xmm5, %xmm5 770; XOPAVX2-NEXT: vpshlb %xmm5, %xmm1, %xmm1 771; XOPAVX2-NEXT: vinserti128 $1, %xmm6, %ymm1, %ymm1 772; XOPAVX2-NEXT: vpxor %xmm3, %xmm4, %xmm4 773; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm5 774; XOPAVX2-NEXT: vpaddb %xmm5, %xmm5, %xmm5 775; XOPAVX2-NEXT: vpshlb %xmm4, %xmm5, %xmm4 776; XOPAVX2-NEXT: vpxor %xmm3, %xmm2, %xmm2 777; XOPAVX2-NEXT: vpaddb %xmm0, %xmm0, %xmm0 778; XOPAVX2-NEXT: vpshlb %xmm2, %xmm0, %xmm0 779; XOPAVX2-NEXT: vinserti128 $1, %xmm4, %ymm0, %ymm0 780; XOPAVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 781; XOPAVX2-NEXT: retq 782 %res = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> %amt) 783 ret <32 x i8> %res 784} 785 786; 787; Uniform Variable Shifts 788; 789 790define <4 x i64> @splatvar_funnnel_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> %amt) nounwind { 791; AVX1-LABEL: splatvar_funnnel_v4i64: 792; AVX1: # %bb.0: 793; AVX1-NEXT: vpmovsxbq {{.*#+}} xmm3 = [63,63] 794; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm4 795; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm5 796; AVX1-NEXT: vpsrlq %xmm4, %xmm5, %xmm5 797; AVX1-NEXT: vpsrlq %xmm4, %xmm1, %xmm1 798; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm1, %ymm1 799; AVX1-NEXT: vpandn %xmm3, %xmm2, %xmm2 800; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 801; AVX1-NEXT: vpaddq %xmm3, %xmm3, %xmm3 802; AVX1-NEXT: vpsllq %xmm2, %xmm3, %xmm3 803; AVX1-NEXT: vpaddq %xmm0, %xmm0, %xmm0 804; AVX1-NEXT: vpsllq %xmm2, %xmm0, %xmm0 805; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 806; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 807; AVX1-NEXT: retq 808; 809; AVX2-LABEL: splatvar_funnnel_v4i64: 810; AVX2: # %bb.0: 811; AVX2-NEXT: vpmovsxbq {{.*#+}} xmm3 = [63,63] 812; AVX2-NEXT: vpand %xmm3, %xmm2, %xmm4 813; AVX2-NEXT: vpsrlq %xmm4, %ymm1, %ymm1 814; AVX2-NEXT: vpandn %xmm3, %xmm2, %xmm2 815; AVX2-NEXT: vpaddq %ymm0, %ymm0, %ymm0 816; AVX2-NEXT: vpsllq %xmm2, %ymm0, %ymm0 817; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 818; AVX2-NEXT: retq 819; 820; AVX512F-LABEL: splatvar_funnnel_v4i64: 821; AVX512F: # %bb.0: 822; AVX512F-NEXT: vpmovsxbq {{.*#+}} xmm3 = [63,63] 823; AVX512F-NEXT: vpand %xmm3, %xmm2, %xmm4 824; AVX512F-NEXT: vpsrlq %xmm4, %ymm1, %ymm1 825; AVX512F-NEXT: vpandn %xmm3, %xmm2, %xmm2 826; AVX512F-NEXT: vpaddq %ymm0, %ymm0, %ymm0 827; AVX512F-NEXT: vpsllq %xmm2, %ymm0, %ymm0 828; AVX512F-NEXT: vpor %ymm1, %ymm0, %ymm0 829; AVX512F-NEXT: retq 830; 831; AVX512VL-LABEL: splatvar_funnnel_v4i64: 832; AVX512VL: # %bb.0: 833; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63] 834; AVX512VL-NEXT: vpand %xmm3, %xmm2, %xmm4 835; AVX512VL-NEXT: vpsrlq %xmm4, %ymm1, %ymm1 836; AVX512VL-NEXT: vpandn %xmm3, %xmm2, %xmm2 837; AVX512VL-NEXT: vpaddq %ymm0, %ymm0, %ymm0 838; AVX512VL-NEXT: vpsllq %xmm2, %ymm0, %ymm0 839; AVX512VL-NEXT: vpor %ymm1, %ymm0, %ymm0 840; AVX512VL-NEXT: retq 841; 842; AVX512BW-LABEL: splatvar_funnnel_v4i64: 843; AVX512BW: # %bb.0: 844; AVX512BW-NEXT: vpmovsxbq {{.*#+}} xmm3 = [63,63] 845; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm4 846; AVX512BW-NEXT: vpsrlq %xmm4, %ymm1, %ymm1 847; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm2 848; AVX512BW-NEXT: vpaddq %ymm0, %ymm0, %ymm0 849; AVX512BW-NEXT: vpsllq %xmm2, %ymm0, %ymm0 850; AVX512BW-NEXT: vpor %ymm1, %ymm0, %ymm0 851; AVX512BW-NEXT: retq 852; 853; AVX512VBMI2-LABEL: splatvar_funnnel_v4i64: 854; AVX512VBMI2: # %bb.0: 855; AVX512VBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 856; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 857; AVX512VBMI2-NEXT: vpbroadcastq %xmm2, %ymm2 858; AVX512VBMI2-NEXT: vpshrdvq %zmm2, %zmm0, %zmm1 859; AVX512VBMI2-NEXT: vmovdqa %ymm1, %ymm0 860; AVX512VBMI2-NEXT: retq 861; 862; AVX512VLBW-LABEL: splatvar_funnnel_v4i64: 863; AVX512VLBW: # %bb.0: 864; AVX512VLBW-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63] 865; AVX512VLBW-NEXT: vpand %xmm3, %xmm2, %xmm4 866; AVX512VLBW-NEXT: vpsrlq %xmm4, %ymm1, %ymm1 867; AVX512VLBW-NEXT: vpandn %xmm3, %xmm2, %xmm2 868; AVX512VLBW-NEXT: vpaddq %ymm0, %ymm0, %ymm0 869; AVX512VLBW-NEXT: vpsllq %xmm2, %ymm0, %ymm0 870; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0 871; AVX512VLBW-NEXT: retq 872; 873; AVX10-LABEL: splatvar_funnnel_v4i64: 874; AVX10: # %bb.0: 875; AVX10-NEXT: vpbroadcastq %xmm2, %ymm2 876; AVX10-NEXT: vpshrdvq %ymm2, %ymm0, %ymm1 877; AVX10-NEXT: vmovdqa %ymm1, %ymm0 878; AVX10-NEXT: retq 879; 880; XOPAVX1-LABEL: splatvar_funnnel_v4i64: 881; XOPAVX1: # %bb.0: 882; XOPAVX1-NEXT: vpmovsxbq {{.*#+}} xmm3 = [63,63] 883; XOPAVX1-NEXT: vpand %xmm3, %xmm2, %xmm4 884; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm5 885; XOPAVX1-NEXT: vpsrlq %xmm4, %xmm5, %xmm5 886; XOPAVX1-NEXT: vpsrlq %xmm4, %xmm1, %xmm1 887; XOPAVX1-NEXT: vinsertf128 $1, %xmm5, %ymm1, %ymm1 888; XOPAVX1-NEXT: vpandn %xmm3, %xmm2, %xmm2 889; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 890; XOPAVX1-NEXT: vpaddq %xmm3, %xmm3, %xmm3 891; XOPAVX1-NEXT: vpsllq %xmm2, %xmm3, %xmm3 892; XOPAVX1-NEXT: vpaddq %xmm0, %xmm0, %xmm0 893; XOPAVX1-NEXT: vpsllq %xmm2, %xmm0, %xmm0 894; XOPAVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 895; XOPAVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 896; XOPAVX1-NEXT: retq 897; 898; XOPAVX2-LABEL: splatvar_funnnel_v4i64: 899; XOPAVX2: # %bb.0: 900; XOPAVX2-NEXT: vpmovsxbq {{.*#+}} xmm3 = [63,63] 901; XOPAVX2-NEXT: vpand %xmm3, %xmm2, %xmm4 902; XOPAVX2-NEXT: vpsrlq %xmm4, %ymm1, %ymm1 903; XOPAVX2-NEXT: vpandn %xmm3, %xmm2, %xmm2 904; XOPAVX2-NEXT: vpaddq %ymm0, %ymm0, %ymm0 905; XOPAVX2-NEXT: vpsllq %xmm2, %ymm0, %ymm0 906; XOPAVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 907; XOPAVX2-NEXT: retq 908 %splat = shufflevector <4 x i64> %amt, <4 x i64> undef, <4 x i32> zeroinitializer 909 %res = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> %splat) 910 ret <4 x i64> %res 911} 912 913define <8 x i32> @splatvar_funnnel_v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i32> %amt) nounwind { 914; AVX1-LABEL: splatvar_funnnel_v8i32: 915; AVX1: # %bb.0: 916; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 917; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4 918; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm4[2],xmm3[2],xmm4[3],xmm3[3] 919; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 920; AVX1-NEXT: vpsrlq %xmm2, %xmm5, %xmm5 921; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm6 = xmm1[2],xmm0[2],xmm1[3],xmm0[3] 922; AVX1-NEXT: vpsrlq %xmm2, %xmm6, %xmm6 923; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm6, %ymm5 924; AVX1-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[1],xmm3[1] 925; AVX1-NEXT: vpsrlq %xmm2, %xmm3, %xmm3 926; AVX1-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 927; AVX1-NEXT: vpsrlq %xmm2, %xmm0, %xmm0 928; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 929; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm5[0,2],ymm0[4,6],ymm5[4,6] 930; AVX1-NEXT: retq 931; 932; AVX2-LABEL: splatvar_funnnel_v8i32: 933; AVX2: # %bb.0: 934; AVX2-NEXT: vpunpckhdq {{.*#+}} ymm3 = ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[6],ymm0[6],ymm1[7],ymm0[7] 935; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 936; AVX2-NEXT: vpsrlq %xmm2, %ymm3, %ymm3 937; AVX2-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[4],ymm0[4],ymm1[5],ymm0[5] 938; AVX2-NEXT: vpsrlq %xmm2, %ymm0, %ymm0 939; AVX2-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm3[0,2],ymm0[4,6],ymm3[4,6] 940; AVX2-NEXT: retq 941; 942; AVX512F-LABEL: splatvar_funnnel_v8i32: 943; AVX512F: # %bb.0: 944; AVX512F-NEXT: vpunpckhdq {{.*#+}} ymm3 = ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[6],ymm0[6],ymm1[7],ymm0[7] 945; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 946; AVX512F-NEXT: vpsrlq %xmm2, %ymm3, %ymm3 947; AVX512F-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[4],ymm0[4],ymm1[5],ymm0[5] 948; AVX512F-NEXT: vpsrlq %xmm2, %ymm0, %ymm0 949; AVX512F-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm3[0,2],ymm0[4,6],ymm3[4,6] 950; AVX512F-NEXT: retq 951; 952; AVX512VL-LABEL: splatvar_funnnel_v8i32: 953; AVX512VL: # %bb.0: 954; AVX512VL-NEXT: vpunpckhdq {{.*#+}} ymm3 = ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[6],ymm0[6],ymm1[7],ymm0[7] 955; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 956; AVX512VL-NEXT: vpsrlq %xmm2, %ymm3, %ymm3 957; AVX512VL-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[4],ymm0[4],ymm1[5],ymm0[5] 958; AVX512VL-NEXT: vpsrlq %xmm2, %ymm0, %ymm0 959; AVX512VL-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm3[0,2],ymm0[4,6],ymm3[4,6] 960; AVX512VL-NEXT: retq 961; 962; AVX512BW-LABEL: splatvar_funnnel_v8i32: 963; AVX512BW: # %bb.0: 964; AVX512BW-NEXT: vpunpckhdq {{.*#+}} ymm3 = ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[6],ymm0[6],ymm1[7],ymm0[7] 965; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 966; AVX512BW-NEXT: vpsrlq %xmm2, %ymm3, %ymm3 967; AVX512BW-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[4],ymm0[4],ymm1[5],ymm0[5] 968; AVX512BW-NEXT: vpsrlq %xmm2, %ymm0, %ymm0 969; AVX512BW-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm3[0,2],ymm0[4,6],ymm3[4,6] 970; AVX512BW-NEXT: retq 971; 972; AVX512VBMI2-LABEL: splatvar_funnnel_v8i32: 973; AVX512VBMI2: # %bb.0: 974; AVX512VBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 975; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 976; AVX512VBMI2-NEXT: vpbroadcastd %xmm2, %ymm2 977; AVX512VBMI2-NEXT: vpshrdvd %zmm2, %zmm0, %zmm1 978; AVX512VBMI2-NEXT: vmovdqa %ymm1, %ymm0 979; AVX512VBMI2-NEXT: retq 980; 981; AVX512VLBW-LABEL: splatvar_funnnel_v8i32: 982; AVX512VLBW: # %bb.0: 983; AVX512VLBW-NEXT: vpunpckhdq {{.*#+}} ymm3 = ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[6],ymm0[6],ymm1[7],ymm0[7] 984; AVX512VLBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 985; AVX512VLBW-NEXT: vpsrlq %xmm2, %ymm3, %ymm3 986; AVX512VLBW-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[4],ymm0[4],ymm1[5],ymm0[5] 987; AVX512VLBW-NEXT: vpsrlq %xmm2, %ymm0, %ymm0 988; AVX512VLBW-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm3[0,2],ymm0[4,6],ymm3[4,6] 989; AVX512VLBW-NEXT: retq 990; 991; AVX10-LABEL: splatvar_funnnel_v8i32: 992; AVX10: # %bb.0: 993; AVX10-NEXT: vpbroadcastd %xmm2, %ymm2 994; AVX10-NEXT: vpshrdvd %ymm2, %ymm0, %ymm1 995; AVX10-NEXT: vmovdqa %ymm1, %ymm0 996; AVX10-NEXT: retq 997; 998; XOPAVX1-LABEL: splatvar_funnnel_v8i32: 999; XOPAVX1: # %bb.0: 1000; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1001; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm4 1002; XOPAVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm4[2],xmm3[2],xmm4[3],xmm3[3] 1003; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 1004; XOPAVX1-NEXT: vpsrlq %xmm2, %xmm5, %xmm5 1005; XOPAVX1-NEXT: vpunpckhdq {{.*#+}} xmm6 = xmm1[2],xmm0[2],xmm1[3],xmm0[3] 1006; XOPAVX1-NEXT: vpsrlq %xmm2, %xmm6, %xmm6 1007; XOPAVX1-NEXT: vinsertf128 $1, %xmm5, %ymm6, %ymm5 1008; XOPAVX1-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[1],xmm3[1] 1009; XOPAVX1-NEXT: vpsrlq %xmm2, %xmm3, %xmm3 1010; XOPAVX1-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 1011; XOPAVX1-NEXT: vpsrlq %xmm2, %xmm0, %xmm0 1012; XOPAVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 1013; XOPAVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm5[0,2],ymm0[4,6],ymm5[4,6] 1014; XOPAVX1-NEXT: retq 1015; 1016; XOPAVX2-LABEL: splatvar_funnnel_v8i32: 1017; XOPAVX2: # %bb.0: 1018; XOPAVX2-NEXT: vpunpckhdq {{.*#+}} ymm3 = ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[6],ymm0[6],ymm1[7],ymm0[7] 1019; XOPAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 1020; XOPAVX2-NEXT: vpsrlq %xmm2, %ymm3, %ymm3 1021; XOPAVX2-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[4],ymm0[4],ymm1[5],ymm0[5] 1022; XOPAVX2-NEXT: vpsrlq %xmm2, %ymm0, %ymm0 1023; XOPAVX2-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm3[0,2],ymm0[4,6],ymm3[4,6] 1024; XOPAVX2-NEXT: retq 1025 %splat = shufflevector <8 x i32> %amt, <8 x i32> undef, <8 x i32> zeroinitializer 1026 %res = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i32> %splat) 1027 ret <8 x i32> %res 1028} 1029 1030define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i16> %amt) nounwind { 1031; AVX1-LABEL: splatvar_funnnel_v16i16: 1032; AVX1: # %bb.0: 1033; AVX1-NEXT: vpmovsxbq {{.*#+}} xmm3 = [15,0] 1034; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm4 1035; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm5 1036; AVX1-NEXT: vpsrlw %xmm4, %xmm5, %xmm5 1037; AVX1-NEXT: vpandn %xmm3, %xmm2, %xmm2 1038; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1039; AVX1-NEXT: vpaddw %xmm3, %xmm3, %xmm3 1040; AVX1-NEXT: vpsllw %xmm2, %xmm3, %xmm3 1041; AVX1-NEXT: vpor %xmm5, %xmm3, %xmm3 1042; AVX1-NEXT: vpsrlw %xmm4, %xmm1, %xmm1 1043; AVX1-NEXT: vpaddw %xmm0, %xmm0, %xmm0 1044; AVX1-NEXT: vpsllw %xmm2, %xmm0, %xmm0 1045; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 1046; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 1047; AVX1-NEXT: retq 1048; 1049; AVX2-LABEL: splatvar_funnnel_v16i16: 1050; AVX2: # %bb.0: 1051; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm3 = [15,0,0,0,15,0,0,0] 1052; AVX2-NEXT: vpand %xmm3, %xmm2, %xmm4 1053; AVX2-NEXT: vpsrlw %xmm4, %ymm1, %ymm1 1054; AVX2-NEXT: vpandn %xmm3, %xmm2, %xmm2 1055; AVX2-NEXT: vpaddw %ymm0, %ymm0, %ymm0 1056; AVX2-NEXT: vpsllw %xmm2, %ymm0, %ymm0 1057; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 1058; AVX2-NEXT: retq 1059; 1060; AVX512F-LABEL: splatvar_funnnel_v16i16: 1061; AVX512F: # %bb.0: 1062; AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm3 = [15,0,0,0,15,0,0,0] 1063; AVX512F-NEXT: vpand %xmm3, %xmm2, %xmm4 1064; AVX512F-NEXT: vpsrlw %xmm4, %ymm1, %ymm1 1065; AVX512F-NEXT: vpandn %xmm3, %xmm2, %xmm2 1066; AVX512F-NEXT: vpaddw %ymm0, %ymm0, %ymm0 1067; AVX512F-NEXT: vpsllw %xmm2, %ymm0, %ymm0 1068; AVX512F-NEXT: vpor %ymm1, %ymm0, %ymm0 1069; AVX512F-NEXT: retq 1070; 1071; AVX512VL-LABEL: splatvar_funnnel_v16i16: 1072; AVX512VL: # %bb.0: 1073; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm3 = [15,0,0,0,15,0,0,0] 1074; AVX512VL-NEXT: vpand %xmm3, %xmm2, %xmm4 1075; AVX512VL-NEXT: vpsrlw %xmm4, %ymm1, %ymm1 1076; AVX512VL-NEXT: vpandn %xmm3, %xmm2, %xmm2 1077; AVX512VL-NEXT: vpaddw %ymm0, %ymm0, %ymm0 1078; AVX512VL-NEXT: vpsllw %xmm2, %ymm0, %ymm0 1079; AVX512VL-NEXT: vpor %ymm1, %ymm0, %ymm0 1080; AVX512VL-NEXT: retq 1081; 1082; AVX512BW-LABEL: splatvar_funnnel_v16i16: 1083; AVX512BW: # %bb.0: 1084; AVX512BW-NEXT: vpbroadcastq {{.*#+}} xmm3 = [15,0,0,0,15,0,0,0] 1085; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm4 1086; AVX512BW-NEXT: vpsrlw %xmm4, %ymm1, %ymm1 1087; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm2 1088; AVX512BW-NEXT: vpaddw %ymm0, %ymm0, %ymm0 1089; AVX512BW-NEXT: vpsllw %xmm2, %ymm0, %ymm0 1090; AVX512BW-NEXT: vpor %ymm1, %ymm0, %ymm0 1091; AVX512BW-NEXT: retq 1092; 1093; AVX512VBMI2-LABEL: splatvar_funnnel_v16i16: 1094; AVX512VBMI2: # %bb.0: 1095; AVX512VBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 1096; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 1097; AVX512VBMI2-NEXT: vpbroadcastw %xmm2, %ymm2 1098; AVX512VBMI2-NEXT: vpshrdvw %zmm2, %zmm0, %zmm1 1099; AVX512VBMI2-NEXT: vmovdqa %ymm1, %ymm0 1100; AVX512VBMI2-NEXT: retq 1101; 1102; AVX512VLBW-LABEL: splatvar_funnnel_v16i16: 1103; AVX512VLBW: # %bb.0: 1104; AVX512VLBW-NEXT: vpbroadcastq {{.*#+}} xmm3 = [15,0,0,0,15,0,0,0] 1105; AVX512VLBW-NEXT: vpand %xmm3, %xmm2, %xmm4 1106; AVX512VLBW-NEXT: vpsrlw %xmm4, %ymm1, %ymm1 1107; AVX512VLBW-NEXT: vpandn %xmm3, %xmm2, %xmm2 1108; AVX512VLBW-NEXT: vpaddw %ymm0, %ymm0, %ymm0 1109; AVX512VLBW-NEXT: vpsllw %xmm2, %ymm0, %ymm0 1110; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0 1111; AVX512VLBW-NEXT: retq 1112; 1113; AVX10-LABEL: splatvar_funnnel_v16i16: 1114; AVX10: # %bb.0: 1115; AVX10-NEXT: vpbroadcastw %xmm2, %ymm2 1116; AVX10-NEXT: vpshrdvw %ymm2, %ymm0, %ymm1 1117; AVX10-NEXT: vmovdqa %ymm1, %ymm0 1118; AVX10-NEXT: retq 1119; 1120; XOPAVX1-LABEL: splatvar_funnnel_v16i16: 1121; XOPAVX1: # %bb.0: 1122; XOPAVX1-NEXT: vpmovsxbq {{.*#+}} xmm3 = [15,0] 1123; XOPAVX1-NEXT: vpand %xmm3, %xmm2, %xmm4 1124; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm5 1125; XOPAVX1-NEXT: vpsrlw %xmm4, %xmm5, %xmm5 1126; XOPAVX1-NEXT: vpandn %xmm3, %xmm2, %xmm2 1127; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1128; XOPAVX1-NEXT: vpaddw %xmm3, %xmm3, %xmm3 1129; XOPAVX1-NEXT: vpsllw %xmm2, %xmm3, %xmm3 1130; XOPAVX1-NEXT: vpor %xmm5, %xmm3, %xmm3 1131; XOPAVX1-NEXT: vpsrlw %xmm4, %xmm1, %xmm1 1132; XOPAVX1-NEXT: vpaddw %xmm0, %xmm0, %xmm0 1133; XOPAVX1-NEXT: vpsllw %xmm2, %xmm0, %xmm0 1134; XOPAVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 1135; XOPAVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 1136; XOPAVX1-NEXT: retq 1137; 1138; XOPAVX2-LABEL: splatvar_funnnel_v16i16: 1139; XOPAVX2: # %bb.0: 1140; XOPAVX2-NEXT: vpbroadcastq {{.*#+}} xmm3 = [15,0,0,0,15,0,0,0] 1141; XOPAVX2-NEXT: vpand %xmm3, %xmm2, %xmm4 1142; XOPAVX2-NEXT: vpsrlw %xmm4, %ymm1, %ymm1 1143; XOPAVX2-NEXT: vpandn %xmm3, %xmm2, %xmm2 1144; XOPAVX2-NEXT: vpaddw %ymm0, %ymm0, %ymm0 1145; XOPAVX2-NEXT: vpsllw %xmm2, %ymm0, %ymm0 1146; XOPAVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 1147; XOPAVX2-NEXT: retq 1148 %splat = shufflevector <16 x i16> %amt, <16 x i16> undef, <16 x i32> zeroinitializer 1149 %res = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i16> %splat) 1150 ret <16 x i16> %res 1151} 1152 1153define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> %amt) nounwind { 1154; AVX1-LABEL: splatvar_funnnel_v32i8: 1155; AVX1: # %bb.0: 1156; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1157; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4 1158; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm5 = xmm4[8],xmm3[8],xmm4[9],xmm3[9],xmm4[10],xmm3[10],xmm4[11],xmm3[11],xmm4[12],xmm3[12],xmm4[13],xmm3[13],xmm4[14],xmm3[14],xmm4[15],xmm3[15] 1159; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 1160; AVX1-NEXT: vpsrlw %xmm2, %xmm5, %xmm5 1161; AVX1-NEXT: vbroadcastss {{.*#+}} xmm6 = [255,255,255,255,255,255,255,255] 1162; AVX1-NEXT: vpand %xmm6, %xmm5, %xmm5 1163; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7] 1164; AVX1-NEXT: vpsrlw %xmm2, %xmm3, %xmm3 1165; AVX1-NEXT: vpand %xmm6, %xmm3, %xmm3 1166; AVX1-NEXT: vpackuswb %xmm5, %xmm3, %xmm3 1167; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm4 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15] 1168; AVX1-NEXT: vpsrlw %xmm2, %xmm4, %xmm4 1169; AVX1-NEXT: vpand %xmm6, %xmm4, %xmm4 1170; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 1171; AVX1-NEXT: vpsrlw %xmm2, %xmm0, %xmm0 1172; AVX1-NEXT: vpand %xmm6, %xmm0, %xmm0 1173; AVX1-NEXT: vpackuswb %xmm4, %xmm0, %xmm0 1174; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 1175; AVX1-NEXT: retq 1176; 1177; AVX2-LABEL: splatvar_funnnel_v32i8: 1178; AVX2: # %bb.0: 1179; AVX2-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31] 1180; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 1181; AVX2-NEXT: vpsrlw %xmm2, %ymm3, %ymm3 1182; AVX2-NEXT: vpbroadcastw {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] 1183; AVX2-NEXT: vpand %ymm4, %ymm3, %ymm3 1184; AVX2-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23] 1185; AVX2-NEXT: vpsrlw %xmm2, %ymm0, %ymm0 1186; AVX2-NEXT: vpand %ymm4, %ymm0, %ymm0 1187; AVX2-NEXT: vpackuswb %ymm3, %ymm0, %ymm0 1188; AVX2-NEXT: retq 1189; 1190; AVX512F-LABEL: splatvar_funnnel_v32i8: 1191; AVX512F: # %bb.0: 1192; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31] 1193; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 1194; AVX512F-NEXT: vpsrlw %xmm2, %ymm3, %ymm3 1195; AVX512F-NEXT: vpbroadcastw {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] 1196; AVX512F-NEXT: vpand %ymm4, %ymm3, %ymm3 1197; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23] 1198; AVX512F-NEXT: vpsrlw %xmm2, %ymm0, %ymm0 1199; AVX512F-NEXT: vpand %ymm4, %ymm0, %ymm0 1200; AVX512F-NEXT: vpackuswb %ymm3, %ymm0, %ymm0 1201; AVX512F-NEXT: retq 1202; 1203; AVX512VL-LABEL: splatvar_funnnel_v32i8: 1204; AVX512VL: # %bb.0: 1205; AVX512VL-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31] 1206; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 1207; AVX512VL-NEXT: vpsrlw %xmm2, %ymm3, %ymm3 1208; AVX512VL-NEXT: vpbroadcastd {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] 1209; AVX512VL-NEXT: vpand %ymm4, %ymm3, %ymm3 1210; AVX512VL-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23] 1211; AVX512VL-NEXT: vpsrlw %xmm2, %ymm0, %ymm0 1212; AVX512VL-NEXT: vpand %ymm4, %ymm0, %ymm0 1213; AVX512VL-NEXT: vpackuswb %ymm3, %ymm0, %ymm0 1214; AVX512VL-NEXT: retq 1215; 1216; AVX512BW-LABEL: splatvar_funnnel_v32i8: 1217; AVX512BW: # %bb.0: 1218; AVX512BW-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31] 1219; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 1220; AVX512BW-NEXT: vpsrlw %xmm2, %ymm3, %ymm3 1221; AVX512BW-NEXT: vpbroadcastw {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] 1222; AVX512BW-NEXT: vpand %ymm4, %ymm3, %ymm3 1223; AVX512BW-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23] 1224; AVX512BW-NEXT: vpsrlw %xmm2, %ymm0, %ymm0 1225; AVX512BW-NEXT: vpand %ymm4, %ymm0, %ymm0 1226; AVX512BW-NEXT: vpackuswb %ymm3, %ymm0, %ymm0 1227; AVX512BW-NEXT: retq 1228; 1229; AVX512VBMI2-LABEL: splatvar_funnnel_v32i8: 1230; AVX512VBMI2: # %bb.0: 1231; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm3 = [0,2,4,6,8,10,12,14,64,66,68,70,72,74,76,78,16,18,20,22,24,26,28,30,80,82,84,86,88,90,92,94] 1232; AVX512VBMI2-NEXT: vpunpckhbw {{.*#+}} ymm4 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31] 1233; AVX512VBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 1234; AVX512VBMI2-NEXT: vpsrlw %xmm2, %ymm4, %ymm4 1235; AVX512VBMI2-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23] 1236; AVX512VBMI2-NEXT: vpsrlw %xmm2, %ymm0, %ymm0 1237; AVX512VBMI2-NEXT: vpermt2b %zmm4, %zmm3, %zmm0 1238; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 1239; AVX512VBMI2-NEXT: retq 1240; 1241; AVX512VLBW-LABEL: splatvar_funnnel_v32i8: 1242; AVX512VLBW: # %bb.0: 1243; AVX512VLBW-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31] 1244; AVX512VLBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 1245; AVX512VLBW-NEXT: vpsrlw %xmm2, %ymm3, %ymm3 1246; AVX512VLBW-NEXT: vpbroadcastw {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] 1247; AVX512VLBW-NEXT: vpand %ymm4, %ymm3, %ymm3 1248; AVX512VLBW-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23] 1249; AVX512VLBW-NEXT: vpsrlw %xmm2, %ymm0, %ymm0 1250; AVX512VLBW-NEXT: vpand %ymm4, %ymm0, %ymm0 1251; AVX512VLBW-NEXT: vpackuswb %ymm3, %ymm0, %ymm0 1252; AVX512VLBW-NEXT: retq 1253; 1254; AVX10-LABEL: splatvar_funnnel_v32i8: 1255; AVX10: # %bb.0: 1256; AVX10-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31] 1257; AVX10-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 1258; AVX10-NEXT: vpsrlw %xmm2, %ymm3, %ymm3 1259; AVX10-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23] 1260; AVX10-NEXT: vpsrlw %xmm2, %ymm0, %ymm1 1261; AVX10-NEXT: vmovdqa {{.*#+}} ymm0 = [0,2,4,6,8,10,12,14,32,34,36,38,40,42,44,46,16,18,20,22,24,26,28,30,48,50,52,54,56,58,60,62] 1262; AVX10-NEXT: vpermi2b %ymm3, %ymm1, %ymm0 1263; AVX10-NEXT: retq 1264; 1265; XOPAVX1-LABEL: splatvar_funnnel_v32i8: 1266; XOPAVX1: # %bb.0: 1267; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1268; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm4 1269; XOPAVX1-NEXT: vpunpckhbw {{.*#+}} xmm5 = xmm4[8],xmm3[8],xmm4[9],xmm3[9],xmm4[10],xmm3[10],xmm4[11],xmm3[11],xmm4[12],xmm3[12],xmm4[13],xmm3[13],xmm4[14],xmm3[14],xmm4[15],xmm3[15] 1270; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 1271; XOPAVX1-NEXT: vpsrlw %xmm2, %xmm5, %xmm5 1272; XOPAVX1-NEXT: vpunpcklbw {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7] 1273; XOPAVX1-NEXT: vpsrlw %xmm2, %xmm3, %xmm3 1274; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30] 1275; XOPAVX1-NEXT: vpperm %xmm4, %xmm5, %xmm3, %xmm3 1276; XOPAVX1-NEXT: vpunpckhbw {{.*#+}} xmm5 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15] 1277; XOPAVX1-NEXT: vpsrlw %xmm2, %xmm5, %xmm5 1278; XOPAVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 1279; XOPAVX1-NEXT: vpsrlw %xmm2, %xmm0, %xmm0 1280; XOPAVX1-NEXT: vpperm %xmm4, %xmm5, %xmm0, %xmm0 1281; XOPAVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 1282; XOPAVX1-NEXT: retq 1283; 1284; XOPAVX2-LABEL: splatvar_funnnel_v32i8: 1285; XOPAVX2: # %bb.0: 1286; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm3 1287; XOPAVX2-NEXT: vextracti128 $1, %ymm1, %xmm4 1288; XOPAVX2-NEXT: vpunpckhbw {{.*#+}} xmm5 = xmm4[8],xmm3[8],xmm4[9],xmm3[9],xmm4[10],xmm3[10],xmm4[11],xmm3[11],xmm4[12],xmm3[12],xmm4[13],xmm3[13],xmm4[14],xmm3[14],xmm4[15],xmm3[15] 1289; XOPAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 1290; XOPAVX2-NEXT: vpsrlw %xmm2, %xmm5, %xmm5 1291; XOPAVX2-NEXT: vpunpcklbw {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7] 1292; XOPAVX2-NEXT: vpsrlw %xmm2, %xmm3, %xmm3 1293; XOPAVX2-NEXT: vmovdqa {{.*#+}} xmm4 = [0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30] 1294; XOPAVX2-NEXT: vpperm %xmm4, %xmm5, %xmm3, %xmm3 1295; XOPAVX2-NEXT: vpunpckhbw {{.*#+}} xmm5 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15] 1296; XOPAVX2-NEXT: vpsrlw %xmm2, %xmm5, %xmm5 1297; XOPAVX2-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 1298; XOPAVX2-NEXT: vpsrlw %xmm2, %xmm0, %xmm0 1299; XOPAVX2-NEXT: vpperm %xmm4, %xmm5, %xmm0, %xmm0 1300; XOPAVX2-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0 1301; XOPAVX2-NEXT: retq 1302 %splat = shufflevector <32 x i8> %amt, <32 x i8> undef, <32 x i32> zeroinitializer 1303 %res = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> %splat) 1304 ret <32 x i8> %res 1305} 1306 1307; 1308; Constant Shifts 1309; 1310 1311define <4 x i64> @constant_funnnel_v4i64(<4 x i64> %x, <4 x i64> %y) nounwind { 1312; AVX1-LABEL: constant_funnnel_v4i64: 1313; AVX1: # %bb.0: 1314; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1315; AVX1-NEXT: vpsrlq $60, %xmm2, %xmm3 1316; AVX1-NEXT: vpsrlq $50, %xmm2, %xmm2 1317; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm3[4,5,6,7] 1318; AVX1-NEXT: vpsrlq $14, %xmm1, %xmm3 1319; AVX1-NEXT: vpsrlq $4, %xmm1, %xmm1 1320; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5,6,7] 1321; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 1322; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 1323; AVX1-NEXT: vpsllq $4, %xmm2, %xmm3 1324; AVX1-NEXT: vpsllq $14, %xmm2, %xmm2 1325; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm3[4,5,6,7] 1326; AVX1-NEXT: vpsllq $50, %xmm0, %xmm3 1327; AVX1-NEXT: vpsllq $60, %xmm0, %xmm0 1328; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm3[4,5,6,7] 1329; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1330; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 1331; AVX1-NEXT: retq 1332; 1333; AVX2-LABEL: constant_funnnel_v4i64: 1334; AVX2: # %bb.0: 1335; AVX2-NEXT: vpsrlvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 1336; AVX2-NEXT: vpsllvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 1337; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 1338; AVX2-NEXT: retq 1339; 1340; AVX512F-LABEL: constant_funnnel_v4i64: 1341; AVX512F: # %bb.0: 1342; AVX512F-NEXT: vpsrlvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 1343; AVX512F-NEXT: vpsllvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 1344; AVX512F-NEXT: vpor %ymm1, %ymm0, %ymm0 1345; AVX512F-NEXT: retq 1346; 1347; AVX512VL-LABEL: constant_funnnel_v4i64: 1348; AVX512VL: # %bb.0: 1349; AVX512VL-NEXT: vpsrlvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 1350; AVX512VL-NEXT: vpsllvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 1351; AVX512VL-NEXT: vpor %ymm1, %ymm0, %ymm0 1352; AVX512VL-NEXT: retq 1353; 1354; AVX512BW-LABEL: constant_funnnel_v4i64: 1355; AVX512BW: # %bb.0: 1356; AVX512BW-NEXT: vpsrlvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 1357; AVX512BW-NEXT: vpsllvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 1358; AVX512BW-NEXT: vpor %ymm1, %ymm0, %ymm0 1359; AVX512BW-NEXT: retq 1360; 1361; AVX512VBMI2-LABEL: constant_funnnel_v4i64: 1362; AVX512VBMI2: # %bb.0: 1363; AVX512VBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 1364; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 1365; AVX512VBMI2-NEXT: vpmovsxbq {{.*#+}} ymm2 = [4,14,50,60] 1366; AVX512VBMI2-NEXT: vpshrdvq %zmm2, %zmm0, %zmm1 1367; AVX512VBMI2-NEXT: vmovdqa %ymm1, %ymm0 1368; AVX512VBMI2-NEXT: retq 1369; 1370; AVX512VLBW-LABEL: constant_funnnel_v4i64: 1371; AVX512VLBW: # %bb.0: 1372; AVX512VLBW-NEXT: vpsrlvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 1373; AVX512VLBW-NEXT: vpsllvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 1374; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0 1375; AVX512VLBW-NEXT: retq 1376; 1377; AVX10-LABEL: constant_funnnel_v4i64: 1378; AVX10: # %bb.0: 1379; AVX10-NEXT: vpshrdvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1 1380; AVX10-NEXT: vmovdqa %ymm1, %ymm0 1381; AVX10-NEXT: retq 1382; 1383; XOPAVX1-LABEL: constant_funnnel_v4i64: 1384; XOPAVX1: # %bb.0: 1385; XOPAVX1-NEXT: vpshlq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm2 1386; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 1387; XOPAVX1-NEXT: vpshlq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 1388; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 1389; XOPAVX1-NEXT: vpshlq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2 1390; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1391; XOPAVX1-NEXT: vpshlq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1392; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 1393; XOPAVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 1394; XOPAVX1-NEXT: retq 1395; 1396; XOPAVX2-LABEL: constant_funnnel_v4i64: 1397; XOPAVX2: # %bb.0: 1398; XOPAVX2-NEXT: vpsrlvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 1399; XOPAVX2-NEXT: vpsllvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 1400; XOPAVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 1401; XOPAVX2-NEXT: retq 1402 %res = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> <i64 4, i64 14, i64 50, i64 60>) 1403 ret <4 x i64> %res 1404} 1405 1406define <8 x i32> @constant_funnnel_v8i32(<8 x i32> %x, <8 x i32> %y) nounwind { 1407; AVX1-LABEL: constant_funnnel_v8i32: 1408; AVX1: # %bb.0: 1409; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1410; AVX1-NEXT: vpsrld $11, %xmm2, %xmm3 1411; AVX1-NEXT: vpsrld $9, %xmm2, %xmm4 1412; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm4[0,1,2,3],xmm3[4,5,6,7] 1413; AVX1-NEXT: vpsrld $10, %xmm2, %xmm4 1414; AVX1-NEXT: vpsrld $8, %xmm2, %xmm2 1415; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm4[4,5,6,7] 1416; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm3[2,3],xmm2[4,5],xmm3[6,7] 1417; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1418; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 1419; AVX1-NEXT: vpor %xmm2, %xmm3, %xmm2 1420; AVX1-NEXT: vpsrld $7, %xmm1, %xmm3 1421; AVX1-NEXT: vpsrld $5, %xmm1, %xmm4 1422; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm4[0,1,2,3],xmm3[4,5,6,7] 1423; AVX1-NEXT: vpsrld $6, %xmm1, %xmm4 1424; AVX1-NEXT: vpsrld $4, %xmm1, %xmm1 1425; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm4[4,5,6,7] 1426; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7] 1427; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1428; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 1429; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1430; AVX1-NEXT: retq 1431; 1432; AVX2-LABEL: constant_funnnel_v8i32: 1433; AVX2: # %bb.0: 1434; AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 1435; AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 1436; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 1437; AVX2-NEXT: retq 1438; 1439; AVX512F-LABEL: constant_funnnel_v8i32: 1440; AVX512F: # %bb.0: 1441; AVX512F-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 1442; AVX512F-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 1443; AVX512F-NEXT: vpor %ymm1, %ymm0, %ymm0 1444; AVX512F-NEXT: retq 1445; 1446; AVX512VL-LABEL: constant_funnnel_v8i32: 1447; AVX512VL: # %bb.0: 1448; AVX512VL-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 1449; AVX512VL-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 1450; AVX512VL-NEXT: vpor %ymm1, %ymm0, %ymm0 1451; AVX512VL-NEXT: retq 1452; 1453; AVX512BW-LABEL: constant_funnnel_v8i32: 1454; AVX512BW: # %bb.0: 1455; AVX512BW-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 1456; AVX512BW-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 1457; AVX512BW-NEXT: vpor %ymm1, %ymm0, %ymm0 1458; AVX512BW-NEXT: retq 1459; 1460; AVX512VBMI2-LABEL: constant_funnnel_v8i32: 1461; AVX512VBMI2: # %bb.0: 1462; AVX512VBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 1463; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 1464; AVX512VBMI2-NEXT: vpmovsxbd {{.*#+}} ymm2 = [4,5,6,7,8,9,10,11] 1465; AVX512VBMI2-NEXT: vpshrdvd %zmm2, %zmm0, %zmm1 1466; AVX512VBMI2-NEXT: vmovdqa %ymm1, %ymm0 1467; AVX512VBMI2-NEXT: retq 1468; 1469; AVX512VLBW-LABEL: constant_funnnel_v8i32: 1470; AVX512VLBW: # %bb.0: 1471; AVX512VLBW-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 1472; AVX512VLBW-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 1473; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0 1474; AVX512VLBW-NEXT: retq 1475; 1476; AVX10-LABEL: constant_funnnel_v8i32: 1477; AVX10: # %bb.0: 1478; AVX10-NEXT: vpshrdvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1 1479; AVX10-NEXT: vmovdqa %ymm1, %ymm0 1480; AVX10-NEXT: retq 1481; 1482; XOPAVX1-LABEL: constant_funnnel_v8i32: 1483; XOPAVX1: # %bb.0: 1484; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1485; XOPAVX1-NEXT: vpshld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 1486; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1487; XOPAVX1-NEXT: vpshld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 1488; XOPAVX1-NEXT: vpor %xmm2, %xmm3, %xmm2 1489; XOPAVX1-NEXT: vpshld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 1490; XOPAVX1-NEXT: vpshld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1491; XOPAVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 1492; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1493; XOPAVX1-NEXT: retq 1494; 1495; XOPAVX2-LABEL: constant_funnnel_v8i32: 1496; XOPAVX2: # %bb.0: 1497; XOPAVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 1498; XOPAVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 1499; XOPAVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 1500; XOPAVX2-NEXT: retq 1501 %res = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>) 1502 ret <8 x i32> %res 1503} 1504 1505define <16 x i16> @constant_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y) nounwind { 1506; AVX1-LABEL: constant_funnnel_v16i16: 1507; AVX1: # %bb.0: 1508; AVX1-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm2 # [u,32768,16384,8192,4096,2048,1024,512] 1509; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3,4,5,6,7] 1510; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 1511; AVX1-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [256,128,64,32,16,8,4,2] 1512; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 1513; AVX1-NEXT: vpaddw %xmm0, %xmm0, %xmm2 1514; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 # [32768,16384,8192,4096,2048,1024,512,256] 1515; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1516; AVX1-NEXT: vpaddw %xmm0, %xmm0, %xmm0 1517; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [128,64,32,16,8,4,2,1] 1518; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 1519; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 1520; AVX1-NEXT: retq 1521; 1522; AVX2-LABEL: constant_funnnel_v16i16: 1523; AVX2: # %bb.0: 1524; AVX2-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm2 # [u,32768,16384,8192,4096,2048,1024,512,256,128,64,32,16,8,4,2] 1525; AVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3,4,5,6,7] 1526; AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm2[4,5,6,7] 1527; AVX2-NEXT: vpaddw %ymm0, %ymm0, %ymm0 1528; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [32768,16384,8192,4096,2048,1024,512,256,128,64,32,16,8,4,2,1] 1529; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 1530; AVX2-NEXT: retq 1531; 1532; AVX512F-LABEL: constant_funnnel_v16i16: 1533; AVX512F: # %bb.0: 1534; AVX512F-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm2 # [u,32768,16384,8192,4096,2048,1024,512,256,128,64,32,16,8,4,2] 1535; AVX512F-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3,4,5,6,7] 1536; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm2[4,5,6,7] 1537; AVX512F-NEXT: vpaddw %ymm0, %ymm0, %ymm0 1538; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [32768,16384,8192,4096,2048,1024,512,256,128,64,32,16,8,4,2,1] 1539; AVX512F-NEXT: vpor %ymm1, %ymm0, %ymm0 1540; AVX512F-NEXT: retq 1541; 1542; AVX512VL-LABEL: constant_funnnel_v16i16: 1543; AVX512VL: # %bb.0: 1544; AVX512VL-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm2 # [u,32768,16384,8192,4096,2048,1024,512,256,128,64,32,16,8,4,2] 1545; AVX512VL-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3,4,5,6,7] 1546; AVX512VL-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm2[4,5,6,7] 1547; AVX512VL-NEXT: vpaddw %ymm0, %ymm0, %ymm0 1548; AVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [32768,16384,8192,4096,2048,1024,512,256,128,64,32,16,8,4,2,1] 1549; AVX512VL-NEXT: vpor %ymm1, %ymm0, %ymm0 1550; AVX512VL-NEXT: retq 1551; 1552; AVX512BW-LABEL: constant_funnnel_v16i16: 1553; AVX512BW: # %bb.0: 1554; AVX512BW-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 1555; AVX512BW-NEXT: vpmovsxbw {{.*#+}} ymm2 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] 1556; AVX512BW-NEXT: vpsrlvw %zmm2, %zmm1, %zmm1 1557; AVX512BW-NEXT: vpmovsxbw {{.*#+}} ymm2 = [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0] 1558; AVX512BW-NEXT: vpaddw %ymm0, %ymm0, %ymm0 1559; AVX512BW-NEXT: vpsllvw %zmm2, %zmm0, %zmm0 1560; AVX512BW-NEXT: vpor %ymm1, %ymm0, %ymm0 1561; AVX512BW-NEXT: retq 1562; 1563; AVX512VBMI2-LABEL: constant_funnnel_v16i16: 1564; AVX512VBMI2: # %bb.0: 1565; AVX512VBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 1566; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 1567; AVX512VBMI2-NEXT: vpmovsxbw {{.*#+}} ymm2 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] 1568; AVX512VBMI2-NEXT: vpshrdvw %zmm2, %zmm0, %zmm1 1569; AVX512VBMI2-NEXT: vmovdqa %ymm1, %ymm0 1570; AVX512VBMI2-NEXT: retq 1571; 1572; AVX512VLBW-LABEL: constant_funnnel_v16i16: 1573; AVX512VLBW: # %bb.0: 1574; AVX512VLBW-NEXT: vpsrlvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 1575; AVX512VLBW-NEXT: vpaddw %ymm0, %ymm0, %ymm0 1576; AVX512VLBW-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 1577; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0 1578; AVX512VLBW-NEXT: retq 1579; 1580; AVX10-LABEL: constant_funnnel_v16i16: 1581; AVX10: # %bb.0: 1582; AVX10-NEXT: vpshrdvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1 1583; AVX10-NEXT: vmovdqa %ymm1, %ymm0 1584; AVX10-NEXT: retq 1585; 1586; XOPAVX1-LABEL: constant_funnnel_v16i16: 1587; XOPAVX1: # %bb.0: 1588; XOPAVX1-NEXT: vpshlw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm2 1589; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 1590; XOPAVX1-NEXT: vpshlw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 1591; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 1592; XOPAVX1-NEXT: vpaddw %xmm0, %xmm0, %xmm2 1593; XOPAVX1-NEXT: vpshlw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 1594; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1595; XOPAVX1-NEXT: vpaddw %xmm0, %xmm0, %xmm0 1596; XOPAVX1-NEXT: vpshlw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1597; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 1598; XOPAVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 1599; XOPAVX1-NEXT: retq 1600; 1601; XOPAVX2-LABEL: constant_funnnel_v16i16: 1602; XOPAVX2: # %bb.0: 1603; XOPAVX2-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm2 # [u,32768,16384,8192,4096,2048,1024,512,256,128,64,32,16,8,4,2] 1604; XOPAVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3,4,5,6,7] 1605; XOPAVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm2[4,5,6,7] 1606; XOPAVX2-NEXT: vpaddw %ymm0, %ymm0, %ymm0 1607; XOPAVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [32768,16384,8192,4096,2048,1024,512,256,128,64,32,16,8,4,2,1] 1608; XOPAVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 1609; XOPAVX2-NEXT: retq 1610 %res = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>) 1611 ret <16 x i16> %res 1612} 1613 1614define <32 x i8> @constant_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y) nounwind { 1615; AVX1-LABEL: constant_funnnel_v32i8: 1616; AVX1: # %bb.0: 1617; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1618; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 1619; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm4 = xmm2[8],xmm3[8],xmm2[9],xmm3[9],xmm2[10],xmm3[10],xmm2[11],xmm3[11],xmm2[12],xmm3[12],xmm2[13],xmm3[13],xmm2[14],xmm3[14],xmm2[15],xmm3[15] 1620; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [256,2,4,8,16,32,64,128] 1621; AVX1-NEXT: vpmullw %xmm5, %xmm4, %xmm4 1622; AVX1-NEXT: vpsrlw $8, %xmm4, %xmm4 1623; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero 1624; AVX1-NEXT: vmovdqa {{.*#+}} xmm6 = [256,128,64,32,16,8,4,2] 1625; AVX1-NEXT: vpmullw %xmm6, %xmm2, %xmm2 1626; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2 1627; AVX1-NEXT: vpackuswb %xmm4, %xmm2, %xmm2 1628; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 1629; AVX1-NEXT: vpaddb %xmm4, %xmm4, %xmm4 1630; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm7 = [128,32,8,2,128,2,8,32] 1631; AVX1-NEXT: vpmaddubsw %xmm7, %xmm4, %xmm8 1632; AVX1-NEXT: vbroadcastss {{.*#+}} xmm9 = [255,255,255,255,255,255,255,255] 1633; AVX1-NEXT: vpand %xmm9, %xmm8, %xmm8 1634; AVX1-NEXT: vmovdqa {{.*#+}} xmm10 = [0,64,0,16,0,4,0,1,0,1,0,4,0,16,0,64] 1635; AVX1-NEXT: vpmaddubsw %xmm10, %xmm4, %xmm4 1636; AVX1-NEXT: vpsllw $8, %xmm4, %xmm4 1637; AVX1-NEXT: vpor %xmm4, %xmm8, %xmm4 1638; AVX1-NEXT: vpor %xmm2, %xmm4, %xmm2 1639; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm1[8],xmm3[8],xmm1[9],xmm3[9],xmm1[10],xmm3[10],xmm1[11],xmm3[11],xmm1[12],xmm3[12],xmm1[13],xmm3[13],xmm1[14],xmm3[14],xmm1[15],xmm3[15] 1640; AVX1-NEXT: vpmullw %xmm5, %xmm3, %xmm3 1641; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3 1642; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero 1643; AVX1-NEXT: vpmullw %xmm6, %xmm1, %xmm1 1644; AVX1-NEXT: vpsrlw $8, %xmm1, %xmm1 1645; AVX1-NEXT: vpackuswb %xmm3, %xmm1, %xmm1 1646; AVX1-NEXT: vpaddb %xmm0, %xmm0, %xmm0 1647; AVX1-NEXT: vpmaddubsw %xmm7, %xmm0, %xmm3 1648; AVX1-NEXT: vpand %xmm3, %xmm9, %xmm3 1649; AVX1-NEXT: vpmaddubsw %xmm10, %xmm0, %xmm0 1650; AVX1-NEXT: vpsllw $8, %xmm0, %xmm0 1651; AVX1-NEXT: vpor %xmm0, %xmm3, %xmm0 1652; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 1653; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1654; AVX1-NEXT: retq 1655; 1656; AVX2-LABEL: constant_funnnel_v32i8: 1657; AVX2: # %bb.0: 1658; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 1659; AVX2-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15],ymm1[24],ymm2[24],ymm1[25],ymm2[25],ymm1[26],ymm2[26],ymm1[27],ymm2[27],ymm1[28],ymm2[28],ymm1[29],ymm2[29],ymm1[30],ymm2[30],ymm1[31],ymm2[31] 1660; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 # [256,2,4,8,16,32,64,128,256,2,4,8,16,32,64,128] 1661; AVX2-NEXT: vpsrlw $8, %ymm3, %ymm3 1662; AVX2-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[16],ymm2[16],ymm1[17],ymm2[17],ymm1[18],ymm2[18],ymm1[19],ymm2[19],ymm1[20],ymm2[20],ymm1[21],ymm2[21],ymm1[22],ymm2[22],ymm1[23],ymm2[23] 1663; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 # [256,128,64,32,16,8,4,2,256,128,64,32,16,8,4,2] 1664; AVX2-NEXT: vpsrlw $8, %ymm1, %ymm1 1665; AVX2-NEXT: vpackuswb %ymm3, %ymm1, %ymm1 1666; AVX2-NEXT: vpaddb %ymm0, %ymm0, %ymm0 1667; AVX2-NEXT: vpmaddubsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm2 # [0,64,0,16,0,4,0,1,0,1,0,4,0,16,0,64,0,64,0,16,0,4,0,1,0,1,0,4,0,16,0,64] 1668; AVX2-NEXT: vpsllw $8, %ymm2, %ymm2 1669; AVX2-NEXT: vpor %ymm1, %ymm2, %ymm1 1670; AVX2-NEXT: vpmaddubsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [128,0,32,0,8,0,2,0,128,0,2,0,8,0,32,0,128,0,32,0,8,0,2,0,128,0,2,0,8,0,32,0] 1671; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 1672; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 1673; AVX2-NEXT: retq 1674; 1675; AVX512F-LABEL: constant_funnnel_v32i8: 1676; AVX512F: # %bb.0: 1677; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2 1678; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15],ymm1[24],ymm2[24],ymm1[25],ymm2[25],ymm1[26],ymm2[26],ymm1[27],ymm2[27],ymm1[28],ymm2[28],ymm1[29],ymm2[29],ymm1[30],ymm2[30],ymm1[31],ymm2[31] 1679; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 # [256,2,4,8,16,32,64,128,256,2,4,8,16,32,64,128] 1680; AVX512F-NEXT: vpsrlw $8, %ymm3, %ymm3 1681; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[16],ymm2[16],ymm1[17],ymm2[17],ymm1[18],ymm2[18],ymm1[19],ymm2[19],ymm1[20],ymm2[20],ymm1[21],ymm2[21],ymm1[22],ymm2[22],ymm1[23],ymm2[23] 1682; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 # [256,128,64,32,16,8,4,2,256,128,64,32,16,8,4,2] 1683; AVX512F-NEXT: vpsrlw $8, %ymm1, %ymm1 1684; AVX512F-NEXT: vpackuswb %ymm3, %ymm1, %ymm1 1685; AVX512F-NEXT: vpaddb %ymm0, %ymm0, %ymm0 1686; AVX512F-NEXT: vpmaddubsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm2 # [0,64,0,16,0,4,0,1,0,1,0,4,0,16,0,64,0,64,0,16,0,4,0,1,0,1,0,4,0,16,0,64] 1687; AVX512F-NEXT: vpsllw $8, %ymm2, %ymm2 1688; AVX512F-NEXT: vpor %ymm1, %ymm2, %ymm1 1689; AVX512F-NEXT: vpmaddubsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [128,0,32,0,8,0,2,0,128,0,2,0,8,0,32,0,128,0,32,0,8,0,2,0,128,0,2,0,8,0,32,0] 1690; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 1691; AVX512F-NEXT: vpor %ymm1, %ymm0, %ymm0 1692; AVX512F-NEXT: retq 1693; 1694; AVX512VL-LABEL: constant_funnnel_v32i8: 1695; AVX512VL: # %bb.0: 1696; AVX512VL-NEXT: vpxor %xmm2, %xmm2, %xmm2 1697; AVX512VL-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15],ymm1[24],ymm2[24],ymm1[25],ymm2[25],ymm1[26],ymm2[26],ymm1[27],ymm2[27],ymm1[28],ymm2[28],ymm1[29],ymm2[29],ymm1[30],ymm2[30],ymm1[31],ymm2[31] 1698; AVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 # [256,2,4,8,16,32,64,128,256,2,4,8,16,32,64,128] 1699; AVX512VL-NEXT: vpsrlw $8, %ymm3, %ymm3 1700; AVX512VL-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[16],ymm2[16],ymm1[17],ymm2[17],ymm1[18],ymm2[18],ymm1[19],ymm2[19],ymm1[20],ymm2[20],ymm1[21],ymm2[21],ymm1[22],ymm2[22],ymm1[23],ymm2[23] 1701; AVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 # [256,128,64,32,16,8,4,2,256,128,64,32,16,8,4,2] 1702; AVX512VL-NEXT: vpsrlw $8, %ymm1, %ymm1 1703; AVX512VL-NEXT: vpackuswb %ymm3, %ymm1, %ymm1 1704; AVX512VL-NEXT: vpaddb %ymm0, %ymm0, %ymm0 1705; AVX512VL-NEXT: vpmaddubsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm2 # [0,64,0,16,0,4,0,1,0,1,0,4,0,16,0,64,0,64,0,16,0,4,0,1,0,1,0,4,0,16,0,64] 1706; AVX512VL-NEXT: vpsllw $8, %ymm2, %ymm2 1707; AVX512VL-NEXT: vpmaddubsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [128,0,32,0,8,0,2,0,128,0,2,0,8,0,32,0,128,0,32,0,8,0,2,0,128,0,2,0,8,0,32,0] 1708; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0 1709; AVX512VL-NEXT: vpternlogq {{.*#+}} ymm0 = ymm0 | ymm1 | ymm2 1710; AVX512VL-NEXT: retq 1711; 1712; AVX512BW-LABEL: constant_funnnel_v32i8: 1713; AVX512BW: # %bb.0: 1714; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero 1715; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero 1716; AVX512BW-NEXT: vpsllw $8, %zmm0, %zmm0 1717; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0 1718; AVX512BW-NEXT: vpsrlvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 1719; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 1720; AVX512BW-NEXT: retq 1721; 1722; AVX512VBMI2-LABEL: constant_funnnel_v32i8: 1723; AVX512VBMI2: # %bb.0: 1724; AVX512VBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 1725; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 1726; AVX512VBMI2-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,64,1,65,2,66,3,67,4,68,5,69,6,70,7,71,8,72,9,73,10,74,11,75,12,76,13,77,14,78,15,79,16,80,17,81,18,82,19,83,20,84,21,85,22,86,23,87,24,88,25,89,26,90,27,91,28,92,29,93,30,94,31,95] 1727; AVX512VBMI2-NEXT: vpermi2b %zmm0, %zmm1, %zmm2 1728; AVX512VBMI2-NEXT: vpsrlvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm0 1729; AVX512VBMI2-NEXT: vpmovwb %zmm0, %ymm0 1730; AVX512VBMI2-NEXT: retq 1731; 1732; AVX512VLBW-LABEL: constant_funnnel_v32i8: 1733; AVX512VLBW: # %bb.0: 1734; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero 1735; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero 1736; AVX512VLBW-NEXT: vpsllw $8, %zmm0, %zmm0 1737; AVX512VLBW-NEXT: vporq %zmm1, %zmm0, %zmm0 1738; AVX512VLBW-NEXT: vpsrlvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 1739; AVX512VLBW-NEXT: vpmovwb %zmm0, %ymm0 1740; AVX512VLBW-NEXT: retq 1741; 1742; AVX512VLVBMI2-LABEL: constant_funnnel_v32i8: 1743; AVX512VLVBMI2: # %bb.0: 1744; AVX512VLVBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 1745; AVX512VLVBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 1746; AVX512VLVBMI2-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,64,1,65,2,66,3,67,4,68,5,69,6,70,7,71,8,72,9,73,10,74,11,75,12,76,13,77,14,78,15,79,16,80,17,81,18,82,19,83,20,84,21,85,22,86,23,87,24,88,25,89,26,90,27,91,28,92,29,93,30,94,31,95] 1747; AVX512VLVBMI2-NEXT: vpermi2b %zmm0, %zmm1, %zmm2 1748; AVX512VLVBMI2-NEXT: vpsrlvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm0 1749; AVX512VLVBMI2-NEXT: vpmovwb %zmm0, %ymm0 1750; AVX512VLVBMI2-NEXT: retq 1751; 1752; AVX10_256-LABEL: constant_funnnel_v32i8: 1753; AVX10_256: # %bb.0: 1754; AVX10_256-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31] 1755; AVX10_256-NEXT: vpsrlvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 1756; AVX10_256-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23] 1757; AVX10_256-NEXT: vpsrlvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1 1758; AVX10_256-NEXT: vmovdqa {{.*#+}} ymm0 = [0,2,4,6,8,10,12,14,32,34,36,38,40,42,44,46,16,18,20,22,24,26,28,30,48,50,52,54,56,58,60,62] 1759; AVX10_256-NEXT: vpermi2b %ymm2, %ymm1, %ymm0 1760; AVX10_256-NEXT: retq 1761; 1762; XOPAVX1-LABEL: constant_funnnel_v32i8: 1763; XOPAVX1: # %bb.0: 1764; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1765; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,255,254,253,252,251,250,249,0,249,250,251,252,253,254,255] 1766; XOPAVX1-NEXT: vpshlb %xmm3, %xmm2, %xmm2 1767; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 1768; XOPAVX1-NEXT: vpaddb %xmm4, %xmm4, %xmm4 1769; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [7,6,5,4,3,2,1,0,7,0,1,2,3,4,5,6] 1770; XOPAVX1-NEXT: vpshlb %xmm5, %xmm4, %xmm4 1771; XOPAVX1-NEXT: vpor %xmm2, %xmm4, %xmm2 1772; XOPAVX1-NEXT: vpshlb %xmm3, %xmm1, %xmm1 1773; XOPAVX1-NEXT: vpaddb %xmm0, %xmm0, %xmm0 1774; XOPAVX1-NEXT: vpshlb %xmm5, %xmm0, %xmm0 1775; XOPAVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 1776; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1777; XOPAVX1-NEXT: retq 1778; 1779; XOPAVX2-LABEL: constant_funnnel_v32i8: 1780; XOPAVX2: # %bb.0: 1781; XOPAVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 1782; XOPAVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,255,254,253,252,251,250,249,0,249,250,251,252,253,254,255] 1783; XOPAVX2-NEXT: vpshlb %xmm3, %xmm2, %xmm2 1784; XOPAVX2-NEXT: vpshlb %xmm3, %xmm1, %xmm1 1785; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 1786; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm2 1787; XOPAVX2-NEXT: vpaddb %xmm2, %xmm2, %xmm2 1788; XOPAVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [7,6,5,4,3,2,1,0,7,0,1,2,3,4,5,6] 1789; XOPAVX2-NEXT: vpshlb %xmm3, %xmm2, %xmm2 1790; XOPAVX2-NEXT: vpaddb %xmm0, %xmm0, %xmm0 1791; XOPAVX2-NEXT: vpshlb %xmm3, %xmm0, %xmm0 1792; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 1793; XOPAVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 1794; XOPAVX2-NEXT: retq 1795 %res = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1>) 1796 ret <32 x i8> %res 1797} 1798 1799; 1800; Uniform Constant Shifts 1801; 1802 1803define <4 x i64> @splatconstant_funnnel_v4i64(<4 x i64> %x, <4 x i64> %y) nounwind { 1804; AVX1-LABEL: splatconstant_funnnel_v4i64: 1805; AVX1: # %bb.0: 1806; AVX1-NEXT: vpsrlq $14, %xmm1, %xmm2 1807; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 1808; AVX1-NEXT: vpsrlq $14, %xmm1, %xmm1 1809; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 1810; AVX1-NEXT: vpsllq $50, %xmm0, %xmm2 1811; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1812; AVX1-NEXT: vpsllq $50, %xmm0, %xmm0 1813; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 1814; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 1815; AVX1-NEXT: retq 1816; 1817; AVX2-LABEL: splatconstant_funnnel_v4i64: 1818; AVX2: # %bb.0: 1819; AVX2-NEXT: vpsrlq $14, %ymm1, %ymm1 1820; AVX2-NEXT: vpsllq $50, %ymm0, %ymm0 1821; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 1822; AVX2-NEXT: retq 1823; 1824; AVX512F-LABEL: splatconstant_funnnel_v4i64: 1825; AVX512F: # %bb.0: 1826; AVX512F-NEXT: vpsrlq $14, %ymm1, %ymm1 1827; AVX512F-NEXT: vpsllq $50, %ymm0, %ymm0 1828; AVX512F-NEXT: vpor %ymm1, %ymm0, %ymm0 1829; AVX512F-NEXT: retq 1830; 1831; AVX512VL-LABEL: splatconstant_funnnel_v4i64: 1832; AVX512VL: # %bb.0: 1833; AVX512VL-NEXT: vpsrlq $14, %ymm1, %ymm1 1834; AVX512VL-NEXT: vpsllq $50, %ymm0, %ymm0 1835; AVX512VL-NEXT: vpor %ymm1, %ymm0, %ymm0 1836; AVX512VL-NEXT: retq 1837; 1838; AVX512BW-LABEL: splatconstant_funnnel_v4i64: 1839; AVX512BW: # %bb.0: 1840; AVX512BW-NEXT: vpsrlq $14, %ymm1, %ymm1 1841; AVX512BW-NEXT: vpsllq $50, %ymm0, %ymm0 1842; AVX512BW-NEXT: vpor %ymm1, %ymm0, %ymm0 1843; AVX512BW-NEXT: retq 1844; 1845; AVX512VBMI2-LABEL: splatconstant_funnnel_v4i64: 1846; AVX512VBMI2: # %bb.0: 1847; AVX512VBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 1848; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 1849; AVX512VBMI2-NEXT: vpshrdq $14, %zmm0, %zmm1, %zmm0 1850; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 1851; AVX512VBMI2-NEXT: retq 1852; 1853; AVX512VLBW-LABEL: splatconstant_funnnel_v4i64: 1854; AVX512VLBW: # %bb.0: 1855; AVX512VLBW-NEXT: vpsrlq $14, %ymm1, %ymm1 1856; AVX512VLBW-NEXT: vpsllq $50, %ymm0, %ymm0 1857; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0 1858; AVX512VLBW-NEXT: retq 1859; 1860; AVX10-LABEL: splatconstant_funnnel_v4i64: 1861; AVX10: # %bb.0: 1862; AVX10-NEXT: vpshrdq $14, %ymm0, %ymm1, %ymm0 1863; AVX10-NEXT: retq 1864; 1865; XOPAVX1-LABEL: splatconstant_funnnel_v4i64: 1866; XOPAVX1: # %bb.0: 1867; XOPAVX1-NEXT: vpsrlq $14, %xmm1, %xmm2 1868; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 1869; XOPAVX1-NEXT: vpsrlq $14, %xmm1, %xmm1 1870; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 1871; XOPAVX1-NEXT: vpsllq $50, %xmm0, %xmm2 1872; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1873; XOPAVX1-NEXT: vpsllq $50, %xmm0, %xmm0 1874; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 1875; XOPAVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 1876; XOPAVX1-NEXT: retq 1877; 1878; XOPAVX2-LABEL: splatconstant_funnnel_v4i64: 1879; XOPAVX2: # %bb.0: 1880; XOPAVX2-NEXT: vpsrlq $14, %ymm1, %ymm1 1881; XOPAVX2-NEXT: vpsllq $50, %ymm0, %ymm0 1882; XOPAVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 1883; XOPAVX2-NEXT: retq 1884 %res = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> <i64 14, i64 14, i64 14, i64 14>) 1885 ret <4 x i64> %res 1886} 1887 1888define <8 x i32> @splatconstant_funnnel_v8i32(<8 x i32> %x, <8 x i32> %y) nounwind { 1889; AVX1-LABEL: splatconstant_funnnel_v8i32: 1890; AVX1: # %bb.0: 1891; AVX1-NEXT: vpsrld $4, %xmm1, %xmm2 1892; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 1893; AVX1-NEXT: vpsrld $4, %xmm1, %xmm1 1894; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 1895; AVX1-NEXT: vpslld $28, %xmm0, %xmm2 1896; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1897; AVX1-NEXT: vpslld $28, %xmm0, %xmm0 1898; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 1899; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 1900; AVX1-NEXT: retq 1901; 1902; AVX2-LABEL: splatconstant_funnnel_v8i32: 1903; AVX2: # %bb.0: 1904; AVX2-NEXT: vpsrld $4, %ymm1, %ymm1 1905; AVX2-NEXT: vpslld $28, %ymm0, %ymm0 1906; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 1907; AVX2-NEXT: retq 1908; 1909; AVX512F-LABEL: splatconstant_funnnel_v8i32: 1910; AVX512F: # %bb.0: 1911; AVX512F-NEXT: vpsrld $4, %ymm1, %ymm1 1912; AVX512F-NEXT: vpslld $28, %ymm0, %ymm0 1913; AVX512F-NEXT: vpor %ymm1, %ymm0, %ymm0 1914; AVX512F-NEXT: retq 1915; 1916; AVX512VL-LABEL: splatconstant_funnnel_v8i32: 1917; AVX512VL: # %bb.0: 1918; AVX512VL-NEXT: vpsrld $4, %ymm1, %ymm1 1919; AVX512VL-NEXT: vpslld $28, %ymm0, %ymm0 1920; AVX512VL-NEXT: vpor %ymm1, %ymm0, %ymm0 1921; AVX512VL-NEXT: retq 1922; 1923; AVX512BW-LABEL: splatconstant_funnnel_v8i32: 1924; AVX512BW: # %bb.0: 1925; AVX512BW-NEXT: vpsrld $4, %ymm1, %ymm1 1926; AVX512BW-NEXT: vpslld $28, %ymm0, %ymm0 1927; AVX512BW-NEXT: vpor %ymm1, %ymm0, %ymm0 1928; AVX512BW-NEXT: retq 1929; 1930; AVX512VBMI2-LABEL: splatconstant_funnnel_v8i32: 1931; AVX512VBMI2: # %bb.0: 1932; AVX512VBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 1933; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 1934; AVX512VBMI2-NEXT: vpshrdd $4, %zmm0, %zmm1, %zmm0 1935; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 1936; AVX512VBMI2-NEXT: retq 1937; 1938; AVX512VLBW-LABEL: splatconstant_funnnel_v8i32: 1939; AVX512VLBW: # %bb.0: 1940; AVX512VLBW-NEXT: vpsrld $4, %ymm1, %ymm1 1941; AVX512VLBW-NEXT: vpslld $28, %ymm0, %ymm0 1942; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0 1943; AVX512VLBW-NEXT: retq 1944; 1945; AVX10-LABEL: splatconstant_funnnel_v8i32: 1946; AVX10: # %bb.0: 1947; AVX10-NEXT: vpshrdd $4, %ymm0, %ymm1, %ymm0 1948; AVX10-NEXT: retq 1949; 1950; XOPAVX1-LABEL: splatconstant_funnnel_v8i32: 1951; XOPAVX1: # %bb.0: 1952; XOPAVX1-NEXT: vpsrld $4, %xmm1, %xmm2 1953; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 1954; XOPAVX1-NEXT: vpsrld $4, %xmm1, %xmm1 1955; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 1956; XOPAVX1-NEXT: vpslld $28, %xmm0, %xmm2 1957; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1958; XOPAVX1-NEXT: vpslld $28, %xmm0, %xmm0 1959; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 1960; XOPAVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 1961; XOPAVX1-NEXT: retq 1962; 1963; XOPAVX2-LABEL: splatconstant_funnnel_v8i32: 1964; XOPAVX2: # %bb.0: 1965; XOPAVX2-NEXT: vpsrld $4, %ymm1, %ymm1 1966; XOPAVX2-NEXT: vpslld $28, %ymm0, %ymm0 1967; XOPAVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 1968; XOPAVX2-NEXT: retq 1969 %res = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>) 1970 ret <8 x i32> %res 1971} 1972 1973define <16 x i16> @splatconstant_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y) nounwind { 1974; AVX1-LABEL: splatconstant_funnnel_v16i16: 1975; AVX1: # %bb.0: 1976; AVX1-NEXT: vpsrlw $7, %xmm1, %xmm2 1977; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 1978; AVX1-NEXT: vpsrlw $7, %xmm1, %xmm1 1979; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 1980; AVX1-NEXT: vpsllw $9, %xmm0, %xmm2 1981; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1982; AVX1-NEXT: vpsllw $9, %xmm0, %xmm0 1983; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 1984; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 1985; AVX1-NEXT: retq 1986; 1987; AVX2-LABEL: splatconstant_funnnel_v16i16: 1988; AVX2: # %bb.0: 1989; AVX2-NEXT: vpsrlw $7, %ymm1, %ymm1 1990; AVX2-NEXT: vpsllw $9, %ymm0, %ymm0 1991; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 1992; AVX2-NEXT: retq 1993; 1994; AVX512F-LABEL: splatconstant_funnnel_v16i16: 1995; AVX512F: # %bb.0: 1996; AVX512F-NEXT: vpsrlw $7, %ymm1, %ymm1 1997; AVX512F-NEXT: vpsllw $9, %ymm0, %ymm0 1998; AVX512F-NEXT: vpor %ymm1, %ymm0, %ymm0 1999; AVX512F-NEXT: retq 2000; 2001; AVX512VL-LABEL: splatconstant_funnnel_v16i16: 2002; AVX512VL: # %bb.0: 2003; AVX512VL-NEXT: vpsrlw $7, %ymm1, %ymm1 2004; AVX512VL-NEXT: vpsllw $9, %ymm0, %ymm0 2005; AVX512VL-NEXT: vpor %ymm1, %ymm0, %ymm0 2006; AVX512VL-NEXT: retq 2007; 2008; AVX512BW-LABEL: splatconstant_funnnel_v16i16: 2009; AVX512BW: # %bb.0: 2010; AVX512BW-NEXT: vpsrlw $7, %ymm1, %ymm1 2011; AVX512BW-NEXT: vpsllw $9, %ymm0, %ymm0 2012; AVX512BW-NEXT: vpor %ymm1, %ymm0, %ymm0 2013; AVX512BW-NEXT: retq 2014; 2015; AVX512VBMI2-LABEL: splatconstant_funnnel_v16i16: 2016; AVX512VBMI2: # %bb.0: 2017; AVX512VBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 2018; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 2019; AVX512VBMI2-NEXT: vpshrdw $7, %zmm0, %zmm1, %zmm0 2020; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 2021; AVX512VBMI2-NEXT: retq 2022; 2023; AVX512VLBW-LABEL: splatconstant_funnnel_v16i16: 2024; AVX512VLBW: # %bb.0: 2025; AVX512VLBW-NEXT: vpsrlw $7, %ymm1, %ymm1 2026; AVX512VLBW-NEXT: vpsllw $9, %ymm0, %ymm0 2027; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0 2028; AVX512VLBW-NEXT: retq 2029; 2030; AVX10-LABEL: splatconstant_funnnel_v16i16: 2031; AVX10: # %bb.0: 2032; AVX10-NEXT: vpshrdw $7, %ymm0, %ymm1, %ymm0 2033; AVX10-NEXT: retq 2034; 2035; XOPAVX1-LABEL: splatconstant_funnnel_v16i16: 2036; XOPAVX1: # %bb.0: 2037; XOPAVX1-NEXT: vpsrlw $7, %xmm1, %xmm2 2038; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 2039; XOPAVX1-NEXT: vpsrlw $7, %xmm1, %xmm1 2040; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 2041; XOPAVX1-NEXT: vpsllw $9, %xmm0, %xmm2 2042; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 2043; XOPAVX1-NEXT: vpsllw $9, %xmm0, %xmm0 2044; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 2045; XOPAVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 2046; XOPAVX1-NEXT: retq 2047; 2048; XOPAVX2-LABEL: splatconstant_funnnel_v16i16: 2049; XOPAVX2: # %bb.0: 2050; XOPAVX2-NEXT: vpsrlw $7, %ymm1, %ymm1 2051; XOPAVX2-NEXT: vpsllw $9, %ymm0, %ymm0 2052; XOPAVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 2053; XOPAVX2-NEXT: retq 2054 %res = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i16> <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>) 2055 ret <16 x i16> %res 2056} 2057 2058define <32 x i8> @splatconstant_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y) nounwind { 2059; AVX1-LABEL: splatconstant_funnnel_v32i8: 2060; AVX1: # %bb.0: 2061; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 2062; AVX1-NEXT: vpsrlw $4, %xmm2, %xmm2 2063; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 2064; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2 2065; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm1 2066; AVX1-NEXT: vpand %xmm3, %xmm1, %xmm1 2067; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 2068; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 2069; AVX1-NEXT: vpsllw $4, %xmm2, %xmm2 2070; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240] 2071; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2 2072; AVX1-NEXT: vpsllw $4, %xmm0, %xmm0 2073; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm0 2074; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 2075; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 2076; AVX1-NEXT: retq 2077; 2078; AVX2-LABEL: splatconstant_funnnel_v32i8: 2079; AVX2: # %bb.0: 2080; AVX2-NEXT: vpsrlw $4, %ymm1, %ymm1 2081; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 2082; AVX2-NEXT: vpsllw $4, %ymm0, %ymm0 2083; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 2084; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 2085; AVX2-NEXT: retq 2086; 2087; AVX512F-LABEL: splatconstant_funnnel_v32i8: 2088; AVX512F: # %bb.0: 2089; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm2 2090; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm0 2091; AVX512F-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (mem & (zmm0 ^ zmm2)) 2092; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 2093; AVX512F-NEXT: retq 2094; 2095; AVX512VL-LABEL: splatconstant_funnnel_v32i8: 2096; AVX512VL: # %bb.0: 2097; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm2 2098; AVX512VL-NEXT: vpsrlw $4, %ymm1, %ymm0 2099; AVX512VL-NEXT: vpternlogd {{.*#+}} ymm0 = ymm0 ^ (mem & (ymm0 ^ ymm2)) 2100; AVX512VL-NEXT: retq 2101; 2102; AVX512BW-LABEL: splatconstant_funnnel_v32i8: 2103; AVX512BW: # %bb.0: 2104; AVX512BW-NEXT: vpsllw $4, %ymm0, %ymm2 2105; AVX512BW-NEXT: vpsrlw $4, %ymm1, %ymm0 2106; AVX512BW-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (mem & (zmm0 ^ zmm2)) 2107; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 2108; AVX512BW-NEXT: retq 2109; 2110; AVX512VBMI2-LABEL: splatconstant_funnnel_v32i8: 2111; AVX512VBMI2: # %bb.0: 2112; AVX512VBMI2-NEXT: vpsllw $4, %ymm0, %ymm2 2113; AVX512VBMI2-NEXT: vpsrlw $4, %ymm1, %ymm0 2114; AVX512VBMI2-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (mem & (zmm0 ^ zmm2)) 2115; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 2116; AVX512VBMI2-NEXT: retq 2117; 2118; AVX512VLBW-LABEL: splatconstant_funnnel_v32i8: 2119; AVX512VLBW: # %bb.0: 2120; AVX512VLBW-NEXT: vpsllw $4, %ymm0, %ymm2 2121; AVX512VLBW-NEXT: vpsrlw $4, %ymm1, %ymm0 2122; AVX512VLBW-NEXT: vpternlogd {{.*#+}} ymm0 = ymm0 ^ (mem & (ymm0 ^ ymm2)) 2123; AVX512VLBW-NEXT: retq 2124; 2125; AVX10-LABEL: splatconstant_funnnel_v32i8: 2126; AVX10: # %bb.0: 2127; AVX10-NEXT: vpsllw $4, %ymm0, %ymm2 2128; AVX10-NEXT: vpsrlw $4, %ymm1, %ymm0 2129; AVX10-NEXT: vpternlogd {{.*#+}} ymm0 = ymm0 ^ (mem & (ymm0 ^ ymm2)) 2130; AVX10-NEXT: retq 2131; 2132; XOPAVX1-LABEL: splatconstant_funnnel_v32i8: 2133; XOPAVX1: # %bb.0: 2134; XOPAVX1-NEXT: vpsrlw $4, %xmm1, %xmm2 2135; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 2136; XOPAVX1-NEXT: vpsrlw $4, %xmm1, %xmm1 2137; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 2138; XOPAVX1-NEXT: vpsllw $4, %xmm0, %xmm2 2139; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 2140; XOPAVX1-NEXT: vpsllw $4, %xmm0, %xmm0 2141; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 2142; XOPAVX1-NEXT: vpcmov {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0, %ymm0 2143; XOPAVX1-NEXT: retq 2144; 2145; XOPAVX2-LABEL: splatconstant_funnnel_v32i8: 2146; XOPAVX2: # %bb.0: 2147; XOPAVX2-NEXT: vpsrlw $4, %ymm1, %ymm1 2148; XOPAVX2-NEXT: vpsllw $4, %ymm0, %ymm0 2149; XOPAVX2-NEXT: vpcmov {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0, %ymm0 2150; XOPAVX2-NEXT: retq 2151 %res = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>) 2152 ret <32 x i8> %res 2153} 2154