1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE2 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE41 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX1 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX2 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX512F 7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefix=AVX512VL 8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=AVX512BW 9; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefix=AVX512VLBW 10; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vbmi,+avx512vbmi2 | FileCheck %s --check-prefix=AVX512VBMI2 11; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vbmi,+avx512vbmi2,+avx512vl | FileCheck %s --check-prefix=AVX512VLVBMI2 12; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx | FileCheck %s --check-prefixes=XOP,XOPAVX1 13; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx2 | FileCheck %s --check-prefixes=XOP,XOPAVX2 14 15; Just one 32-bit run to make sure we do reasonable things for i64 cases. 16; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X86-SSE2 17 18declare <2 x i32> @llvm.fshl.v2i32(<2 x i32>, <2 x i32>, <2 x i32>) 19 20; 21; Variable Shifts 22; 23 24define <2 x i32> @var_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind { 25; SSE2-LABEL: var_funnnel_v2i32: 26; SSE2: # %bb.0: 27; SSE2-NEXT: pslld $23, %xmm1 28; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 29; SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 30; SSE2-NEXT: cvttps2dq %xmm1, %xmm1 31; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 32; SSE2-NEXT: pmuludq %xmm1, %xmm0 33; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,3,2,3] 34; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 35; SSE2-NEXT: pmuludq %xmm2, %xmm1 36; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3] 37; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1] 38; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 39; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 40; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 41; SSE2-NEXT: por %xmm3, %xmm0 42; SSE2-NEXT: retq 43; 44; SSE41-LABEL: var_funnnel_v2i32: 45; SSE41: # %bb.0: 46; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 47; SSE41-NEXT: pslld $23, %xmm1 48; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 49; SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 50; SSE41-NEXT: cvttps2dq %xmm1, %xmm1 51; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3] 52; SSE41-NEXT: pmuludq %xmm2, %xmm3 53; SSE41-NEXT: pmuludq %xmm1, %xmm0 54; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 55; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7] 56; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2] 57; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7] 58; SSE41-NEXT: por %xmm1, %xmm0 59; SSE41-NEXT: retq 60; 61; AVX1-LABEL: var_funnnel_v2i32: 62; AVX1: # %bb.0: 63; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 64; AVX1-NEXT: vpslld $23, %xmm1, %xmm1 65; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 66; AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 67; AVX1-NEXT: vcvttps2dq %xmm1, %xmm1 68; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm1[1,1,3,3] 69; AVX1-NEXT: vpmuludq %xmm3, %xmm2, %xmm2 70; AVX1-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 71; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 72; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] 73; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,2,2] 74; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7] 75; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 76; AVX1-NEXT: retq 77; 78; AVX2-LABEL: var_funnnel_v2i32: 79; AVX2: # %bb.0: 80; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31] 81; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1 82; AVX2-NEXT: vpsllvd %xmm1, %xmm0, %xmm2 83; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm3 = [32,32,32,32] 84; AVX2-NEXT: vpsubd %xmm1, %xmm3, %xmm1 85; AVX2-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 86; AVX2-NEXT: vpor %xmm0, %xmm2, %xmm0 87; AVX2-NEXT: retq 88; 89; AVX512F-LABEL: var_funnnel_v2i32: 90; AVX512F: # %bb.0: 91; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 92; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 93; AVX512F-NEXT: vprolvd %zmm1, %zmm0, %zmm0 94; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 95; AVX512F-NEXT: vzeroupper 96; AVX512F-NEXT: retq 97; 98; AVX512VL-LABEL: var_funnnel_v2i32: 99; AVX512VL: # %bb.0: 100; AVX512VL-NEXT: vprolvd %xmm1, %xmm0, %xmm0 101; AVX512VL-NEXT: retq 102; 103; AVX512BW-LABEL: var_funnnel_v2i32: 104; AVX512BW: # %bb.0: 105; AVX512BW-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 106; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 107; AVX512BW-NEXT: vprolvd %zmm1, %zmm0, %zmm0 108; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 109; AVX512BW-NEXT: vzeroupper 110; AVX512BW-NEXT: retq 111; 112; AVX512VLBW-LABEL: var_funnnel_v2i32: 113; AVX512VLBW: # %bb.0: 114; AVX512VLBW-NEXT: vprolvd %xmm1, %xmm0, %xmm0 115; AVX512VLBW-NEXT: retq 116; 117; AVX512VBMI2-LABEL: var_funnnel_v2i32: 118; AVX512VBMI2: # %bb.0: 119; AVX512VBMI2-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 120; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 121; AVX512VBMI2-NEXT: vprolvd %zmm1, %zmm0, %zmm0 122; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 123; AVX512VBMI2-NEXT: vzeroupper 124; AVX512VBMI2-NEXT: retq 125; 126; AVX512VLVBMI2-LABEL: var_funnnel_v2i32: 127; AVX512VLVBMI2: # %bb.0: 128; AVX512VLVBMI2-NEXT: vprolvd %xmm1, %xmm0, %xmm0 129; AVX512VLVBMI2-NEXT: retq 130; 131; XOP-LABEL: var_funnnel_v2i32: 132; XOP: # %bb.0: 133; XOP-NEXT: vprotd %xmm1, %xmm0, %xmm0 134; XOP-NEXT: retq 135; 136; X86-SSE2-LABEL: var_funnnel_v2i32: 137; X86-SSE2: # %bb.0: 138; X86-SSE2-NEXT: pslld $23, %xmm1 139; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 140; X86-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 141; X86-SSE2-NEXT: cvttps2dq %xmm1, %xmm1 142; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 143; X86-SSE2-NEXT: pmuludq %xmm1, %xmm0 144; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,3,2,3] 145; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 146; X86-SSE2-NEXT: pmuludq %xmm2, %xmm1 147; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3] 148; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1] 149; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 150; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 151; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 152; X86-SSE2-NEXT: por %xmm3, %xmm0 153; X86-SSE2-NEXT: retl 154 %res = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> %x, <2 x i32> %x, <2 x i32> %amt) 155 ret <2 x i32> %res 156} 157 158; 159; Uniform Variable Shifts 160; 161 162define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind { 163; SSE2-LABEL: splatvar_funnnel_v2i32: 164; SSE2: # %bb.0: 165; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 166; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,2,3,3] 167; SSE2-NEXT: psllq %xmm1, %xmm2 168; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 169; SSE2-NEXT: psllq %xmm1, %xmm0 170; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,3],xmm2[1,3] 171; SSE2-NEXT: retq 172; 173; SSE41-LABEL: splatvar_funnnel_v2i32: 174; SSE41: # %bb.0: 175; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 176; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,2,3,3] 177; SSE41-NEXT: psllq %xmm1, %xmm2 178; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 179; SSE41-NEXT: psllq %xmm1, %xmm0 180; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,3],xmm2[1,3] 181; SSE41-NEXT: retq 182; 183; AVX1-LABEL: splatvar_funnnel_v2i32: 184; AVX1: # %bb.0: 185; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 186; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[2,2,3,3] 187; AVX1-NEXT: vpsllq %xmm1, %xmm2, %xmm2 188; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 189; AVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm0 190; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,3],xmm2[1,3] 191; AVX1-NEXT: retq 192; 193; AVX2-LABEL: splatvar_funnnel_v2i32: 194; AVX2: # %bb.0: 195; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 196; AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[2,2,3,3] 197; AVX2-NEXT: vpsllq %xmm1, %xmm2, %xmm2 198; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 199; AVX2-NEXT: vpsllq %xmm1, %xmm0, %xmm0 200; AVX2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,3],xmm2[1,3] 201; AVX2-NEXT: retq 202; 203; AVX512F-LABEL: splatvar_funnnel_v2i32: 204; AVX512F: # %bb.0: 205; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 206; AVX512F-NEXT: vpbroadcastd %xmm1, %xmm1 207; AVX512F-NEXT: vprolvd %zmm1, %zmm0, %zmm0 208; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 209; AVX512F-NEXT: vzeroupper 210; AVX512F-NEXT: retq 211; 212; AVX512VL-LABEL: splatvar_funnnel_v2i32: 213; AVX512VL: # %bb.0: 214; AVX512VL-NEXT: vpbroadcastd %xmm1, %xmm1 215; AVX512VL-NEXT: vprolvd %xmm1, %xmm0, %xmm0 216; AVX512VL-NEXT: retq 217; 218; AVX512BW-LABEL: splatvar_funnnel_v2i32: 219; AVX512BW: # %bb.0: 220; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 221; AVX512BW-NEXT: vpbroadcastd %xmm1, %xmm1 222; AVX512BW-NEXT: vprolvd %zmm1, %zmm0, %zmm0 223; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 224; AVX512BW-NEXT: vzeroupper 225; AVX512BW-NEXT: retq 226; 227; AVX512VLBW-LABEL: splatvar_funnnel_v2i32: 228; AVX512VLBW: # %bb.0: 229; AVX512VLBW-NEXT: vpbroadcastd %xmm1, %xmm1 230; AVX512VLBW-NEXT: vprolvd %xmm1, %xmm0, %xmm0 231; AVX512VLBW-NEXT: retq 232; 233; AVX512VBMI2-LABEL: splatvar_funnnel_v2i32: 234; AVX512VBMI2: # %bb.0: 235; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 236; AVX512VBMI2-NEXT: vpbroadcastd %xmm1, %xmm1 237; AVX512VBMI2-NEXT: vprolvd %zmm1, %zmm0, %zmm0 238; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 239; AVX512VBMI2-NEXT: vzeroupper 240; AVX512VBMI2-NEXT: retq 241; 242; AVX512VLVBMI2-LABEL: splatvar_funnnel_v2i32: 243; AVX512VLVBMI2: # %bb.0: 244; AVX512VLVBMI2-NEXT: vpbroadcastd %xmm1, %xmm1 245; AVX512VLVBMI2-NEXT: vprolvd %xmm1, %xmm0, %xmm0 246; AVX512VLVBMI2-NEXT: retq 247; 248; XOPAVX1-LABEL: splatvar_funnnel_v2i32: 249; XOPAVX1: # %bb.0: 250; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] 251; XOPAVX1-NEXT: vprotd %xmm1, %xmm0, %xmm0 252; XOPAVX1-NEXT: retq 253; 254; XOPAVX2-LABEL: splatvar_funnnel_v2i32: 255; XOPAVX2: # %bb.0: 256; XOPAVX2-NEXT: vpbroadcastd %xmm1, %xmm1 257; XOPAVX2-NEXT: vprotd %xmm1, %xmm0, %xmm0 258; XOPAVX2-NEXT: retq 259; 260; X86-SSE2-LABEL: splatvar_funnnel_v2i32: 261; X86-SSE2: # %bb.0: 262; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 263; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,2,3,3] 264; X86-SSE2-NEXT: psllq %xmm1, %xmm2 265; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 266; X86-SSE2-NEXT: psllq %xmm1, %xmm0 267; X86-SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,3],xmm2[1,3] 268; X86-SSE2-NEXT: retl 269 %splat = shufflevector <2 x i32> %amt, <2 x i32> undef, <2 x i32> zeroinitializer 270 %res = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> %x, <2 x i32> %x, <2 x i32> %splat) 271 ret <2 x i32> %res 272} 273 274; 275; Constant Shifts 276; 277 278define <2 x i32> @constant_funnnel_v2i32(<2 x i32> %x) nounwind { 279; SSE2-LABEL: constant_funnnel_v2i32: 280; SSE2: # %bb.0: 281; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 282; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 283; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,3,2,3] 284; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 285; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,3,2,3] 286; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] 287; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 288; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 289; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 290; SSE2-NEXT: por %xmm2, %xmm0 291; SSE2-NEXT: retq 292; 293; SSE41-LABEL: constant_funnnel_v2i32: 294; SSE41: # %bb.0: 295; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 296; SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 297; SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 298; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 299; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] 300; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] 301; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] 302; SSE41-NEXT: por %xmm2, %xmm0 303; SSE41-NEXT: retq 304; 305; AVX1-LABEL: constant_funnnel_v2i32: 306; AVX1: # %bb.0: 307; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 308; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 309; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 310; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 311; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] 312; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] 313; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] 314; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0 315; AVX1-NEXT: retq 316; 317; AVX2-LABEL: constant_funnnel_v2i32: 318; AVX2: # %bb.0: 319; AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 320; AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 321; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 322; AVX2-NEXT: retq 323; 324; AVX512F-LABEL: constant_funnnel_v2i32: 325; AVX512F: # %bb.0: 326; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 327; AVX512F-NEXT: vpmovsxbd {{.*#+}} xmm1 = [4,5,0,0] 328; AVX512F-NEXT: vprolvd %zmm1, %zmm0, %zmm0 329; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 330; AVX512F-NEXT: vzeroupper 331; AVX512F-NEXT: retq 332; 333; AVX512VL-LABEL: constant_funnnel_v2i32: 334; AVX512VL: # %bb.0: 335; AVX512VL-NEXT: vprolvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 336; AVX512VL-NEXT: retq 337; 338; AVX512BW-LABEL: constant_funnnel_v2i32: 339; AVX512BW: # %bb.0: 340; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 341; AVX512BW-NEXT: vpmovsxbd {{.*#+}} xmm1 = [4,5,0,0] 342; AVX512BW-NEXT: vprolvd %zmm1, %zmm0, %zmm0 343; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 344; AVX512BW-NEXT: vzeroupper 345; AVX512BW-NEXT: retq 346; 347; AVX512VLBW-LABEL: constant_funnnel_v2i32: 348; AVX512VLBW: # %bb.0: 349; AVX512VLBW-NEXT: vprolvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 350; AVX512VLBW-NEXT: retq 351; 352; AVX512VBMI2-LABEL: constant_funnnel_v2i32: 353; AVX512VBMI2: # %bb.0: 354; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 355; AVX512VBMI2-NEXT: vpmovsxbd {{.*#+}} xmm1 = [4,5,0,0] 356; AVX512VBMI2-NEXT: vprolvd %zmm1, %zmm0, %zmm0 357; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 358; AVX512VBMI2-NEXT: vzeroupper 359; AVX512VBMI2-NEXT: retq 360; 361; AVX512VLVBMI2-LABEL: constant_funnnel_v2i32: 362; AVX512VLVBMI2: # %bb.0: 363; AVX512VLVBMI2-NEXT: vprolvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 364; AVX512VLVBMI2-NEXT: retq 365; 366; XOP-LABEL: constant_funnnel_v2i32: 367; XOP: # %bb.0: 368; XOP-NEXT: vprotd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 369; XOP-NEXT: retq 370; 371; X86-SSE2-LABEL: constant_funnnel_v2i32: 372; X86-SSE2: # %bb.0: 373; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 374; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 375; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,3,2,3] 376; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 377; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,3,2,3] 378; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] 379; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 380; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 381; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 382; X86-SSE2-NEXT: por %xmm2, %xmm0 383; X86-SSE2-NEXT: retl 384 %res = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> %x, <2 x i32> %x, <2 x i32> <i32 4, i32 5>) 385 ret <2 x i32> %res 386} 387 388; 389; Uniform Constant Shifts 390; 391 392define <2 x i32> @splatconstant_funnnel_v2i32(<2 x i32> %x) nounwind { 393; SSE2-LABEL: splatconstant_funnnel_v2i32: 394; SSE2: # %bb.0: 395; SSE2-NEXT: movdqa %xmm0, %xmm1 396; SSE2-NEXT: psrld $28, %xmm1 397; SSE2-NEXT: pslld $4, %xmm0 398; SSE2-NEXT: por %xmm1, %xmm0 399; SSE2-NEXT: retq 400; 401; SSE41-LABEL: splatconstant_funnnel_v2i32: 402; SSE41: # %bb.0: 403; SSE41-NEXT: movdqa %xmm0, %xmm1 404; SSE41-NEXT: psrld $28, %xmm1 405; SSE41-NEXT: pslld $4, %xmm0 406; SSE41-NEXT: por %xmm1, %xmm0 407; SSE41-NEXT: retq 408; 409; AVX1-LABEL: splatconstant_funnnel_v2i32: 410; AVX1: # %bb.0: 411; AVX1-NEXT: vpsrld $28, %xmm0, %xmm1 412; AVX1-NEXT: vpslld $4, %xmm0, %xmm0 413; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 414; AVX1-NEXT: retq 415; 416; AVX2-LABEL: splatconstant_funnnel_v2i32: 417; AVX2: # %bb.0: 418; AVX2-NEXT: vpsrld $28, %xmm0, %xmm1 419; AVX2-NEXT: vpslld $4, %xmm0, %xmm0 420; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 421; AVX2-NEXT: retq 422; 423; AVX512F-LABEL: splatconstant_funnnel_v2i32: 424; AVX512F: # %bb.0: 425; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 426; AVX512F-NEXT: vprold $4, %zmm0, %zmm0 427; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 428; AVX512F-NEXT: vzeroupper 429; AVX512F-NEXT: retq 430; 431; AVX512VL-LABEL: splatconstant_funnnel_v2i32: 432; AVX512VL: # %bb.0: 433; AVX512VL-NEXT: vprold $4, %xmm0, %xmm0 434; AVX512VL-NEXT: retq 435; 436; AVX512BW-LABEL: splatconstant_funnnel_v2i32: 437; AVX512BW: # %bb.0: 438; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 439; AVX512BW-NEXT: vprold $4, %zmm0, %zmm0 440; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 441; AVX512BW-NEXT: vzeroupper 442; AVX512BW-NEXT: retq 443; 444; AVX512VLBW-LABEL: splatconstant_funnnel_v2i32: 445; AVX512VLBW: # %bb.0: 446; AVX512VLBW-NEXT: vprold $4, %xmm0, %xmm0 447; AVX512VLBW-NEXT: retq 448; 449; AVX512VBMI2-LABEL: splatconstant_funnnel_v2i32: 450; AVX512VBMI2: # %bb.0: 451; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 452; AVX512VBMI2-NEXT: vprold $4, %zmm0, %zmm0 453; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 454; AVX512VBMI2-NEXT: vzeroupper 455; AVX512VBMI2-NEXT: retq 456; 457; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v2i32: 458; AVX512VLVBMI2: # %bb.0: 459; AVX512VLVBMI2-NEXT: vprold $4, %xmm0, %xmm0 460; AVX512VLVBMI2-NEXT: retq 461; 462; XOP-LABEL: splatconstant_funnnel_v2i32: 463; XOP: # %bb.0: 464; XOP-NEXT: vprotd $4, %xmm0, %xmm0 465; XOP-NEXT: retq 466; 467; X86-SSE2-LABEL: splatconstant_funnnel_v2i32: 468; X86-SSE2: # %bb.0: 469; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 470; X86-SSE2-NEXT: psrld $28, %xmm1 471; X86-SSE2-NEXT: pslld $4, %xmm0 472; X86-SSE2-NEXT: por %xmm1, %xmm0 473; X86-SSE2-NEXT: retl 474 %res = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> %x, <2 x i32> %x, <2 x i32> <i32 4, i32 4>) 475 ret <2 x i32> %res 476} 477