1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE2 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE41 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX1 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX2 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX512F 7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefix=AVX512VL 8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=AVX512BW 9; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefix=AVX512VLBW 10; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vbmi,+avx512vbmi2 | FileCheck %s --check-prefix=AVX512VBMI2 11; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vbmi,+avx512vbmi2,+avx512vl | FileCheck %s --check-prefix=AVX512VLVBMI2 12; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx | FileCheck %s --check-prefixes=XOP,XOPAVX1 13; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx2 | FileCheck %s --check-prefixes=XOP,XOPAVX2 14 15; Just one 32-bit run to make sure we do reasonable things for i64 cases. 16; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X86-SSE2 17 18declare <2 x i32> @llvm.fshr.v2i32(<2 x i32>, <2 x i32>, <2 x i32>) 19 20; 21; Variable Shifts 22; 23 24define <2 x i32> @var_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind { 25; SSE2-LABEL: var_funnnel_v2i32: 26; SSE2: # %bb.0: 27; SSE2-NEXT: pxor %xmm2, %xmm2 28; SSE2-NEXT: psubd %xmm1, %xmm2 29; SSE2-NEXT: pslld $23, %xmm2 30; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 31; SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 32; SSE2-NEXT: cvttps2dq %xmm2, %xmm1 33; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 34; SSE2-NEXT: pmuludq %xmm1, %xmm0 35; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,3,2,3] 36; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 37; SSE2-NEXT: pmuludq %xmm2, %xmm1 38; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3] 39; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1] 40; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 41; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 42; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 43; SSE2-NEXT: por %xmm3, %xmm0 44; SSE2-NEXT: retq 45; 46; SSE41-LABEL: var_funnnel_v2i32: 47; SSE41: # %bb.0: 48; SSE41-NEXT: pxor %xmm2, %xmm2 49; SSE41-NEXT: psubd %xmm1, %xmm2 50; SSE41-NEXT: pslld $23, %xmm2 51; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 52; SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 53; SSE41-NEXT: cvttps2dq %xmm2, %xmm1 54; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 55; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 56; SSE41-NEXT: pmuludq %xmm2, %xmm3 57; SSE41-NEXT: pmuludq %xmm1, %xmm0 58; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 59; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7] 60; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2] 61; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7] 62; SSE41-NEXT: por %xmm1, %xmm0 63; SSE41-NEXT: retq 64; 65; AVX1-LABEL: var_funnnel_v2i32: 66; AVX1: # %bb.0: 67; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 68; AVX1-NEXT: vpsubd %xmm1, %xmm2, %xmm1 69; AVX1-NEXT: vpslld $23, %xmm1, %xmm1 70; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 71; AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 72; AVX1-NEXT: vcvttps2dq %xmm1, %xmm1 73; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 74; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 75; AVX1-NEXT: vpmuludq %xmm2, %xmm3, %xmm2 76; AVX1-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 77; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 78; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] 79; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,2,2] 80; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7] 81; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 82; AVX1-NEXT: retq 83; 84; AVX2-LABEL: var_funnnel_v2i32: 85; AVX2: # %bb.0: 86; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31] 87; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1 88; AVX2-NEXT: vpsrlvd %xmm1, %xmm0, %xmm2 89; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm3 = [32,32,32,32] 90; AVX2-NEXT: vpsubd %xmm1, %xmm3, %xmm1 91; AVX2-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 92; AVX2-NEXT: vpor %xmm0, %xmm2, %xmm0 93; AVX2-NEXT: retq 94; 95; AVX512F-LABEL: var_funnnel_v2i32: 96; AVX512F: # %bb.0: 97; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 98; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 99; AVX512F-NEXT: vprorvd %zmm1, %zmm0, %zmm0 100; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 101; AVX512F-NEXT: vzeroupper 102; AVX512F-NEXT: retq 103; 104; AVX512VL-LABEL: var_funnnel_v2i32: 105; AVX512VL: # %bb.0: 106; AVX512VL-NEXT: vprorvd %xmm1, %xmm0, %xmm0 107; AVX512VL-NEXT: retq 108; 109; AVX512BW-LABEL: var_funnnel_v2i32: 110; AVX512BW: # %bb.0: 111; AVX512BW-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 112; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 113; AVX512BW-NEXT: vprorvd %zmm1, %zmm0, %zmm0 114; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 115; AVX512BW-NEXT: vzeroupper 116; AVX512BW-NEXT: retq 117; 118; AVX512VLBW-LABEL: var_funnnel_v2i32: 119; AVX512VLBW: # %bb.0: 120; AVX512VLBW-NEXT: vprorvd %xmm1, %xmm0, %xmm0 121; AVX512VLBW-NEXT: retq 122; 123; AVX512VBMI2-LABEL: var_funnnel_v2i32: 124; AVX512VBMI2: # %bb.0: 125; AVX512VBMI2-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 126; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 127; AVX512VBMI2-NEXT: vprorvd %zmm1, %zmm0, %zmm0 128; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 129; AVX512VBMI2-NEXT: vzeroupper 130; AVX512VBMI2-NEXT: retq 131; 132; AVX512VLVBMI2-LABEL: var_funnnel_v2i32: 133; AVX512VLVBMI2: # %bb.0: 134; AVX512VLVBMI2-NEXT: vprorvd %xmm1, %xmm0, %xmm0 135; AVX512VLVBMI2-NEXT: retq 136; 137; XOP-LABEL: var_funnnel_v2i32: 138; XOP: # %bb.0: 139; XOP-NEXT: vpxor %xmm2, %xmm2, %xmm2 140; XOP-NEXT: vpsubd %xmm1, %xmm2, %xmm1 141; XOP-NEXT: vprotd %xmm1, %xmm0, %xmm0 142; XOP-NEXT: retq 143; 144; X86-SSE2-LABEL: var_funnnel_v2i32: 145; X86-SSE2: # %bb.0: 146; X86-SSE2-NEXT: pxor %xmm2, %xmm2 147; X86-SSE2-NEXT: psubd %xmm1, %xmm2 148; X86-SSE2-NEXT: pslld $23, %xmm2 149; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm2 150; X86-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm2 151; X86-SSE2-NEXT: cvttps2dq %xmm2, %xmm1 152; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 153; X86-SSE2-NEXT: pmuludq %xmm1, %xmm0 154; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,3,2,3] 155; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 156; X86-SSE2-NEXT: pmuludq %xmm2, %xmm1 157; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3] 158; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1] 159; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 160; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 161; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 162; X86-SSE2-NEXT: por %xmm3, %xmm0 163; X86-SSE2-NEXT: retl 164 %res = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %x, <2 x i32> %x, <2 x i32> %amt) 165 ret <2 x i32> %res 166} 167 168; 169; Uniform Variable Shifts 170; 171 172define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind { 173; SSE2-LABEL: splatvar_funnnel_v2i32: 174; SSE2: # %bb.0: 175; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 176; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,2,3,3] 177; SSE2-NEXT: psrlq %xmm1, %xmm2 178; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 179; SSE2-NEXT: psrlq %xmm1, %xmm0 180; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[0,2] 181; SSE2-NEXT: retq 182; 183; SSE41-LABEL: splatvar_funnnel_v2i32: 184; SSE41: # %bb.0: 185; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 186; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,2,3,3] 187; SSE41-NEXT: psrlq %xmm1, %xmm2 188; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 189; SSE41-NEXT: psrlq %xmm1, %xmm0 190; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[0,2] 191; SSE41-NEXT: retq 192; 193; AVX1-LABEL: splatvar_funnnel_v2i32: 194; AVX1: # %bb.0: 195; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 196; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[2,2,3,3] 197; AVX1-NEXT: vpsrlq %xmm1, %xmm2, %xmm2 198; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 199; AVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 200; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[0,2] 201; AVX1-NEXT: retq 202; 203; AVX2-LABEL: splatvar_funnnel_v2i32: 204; AVX2: # %bb.0: 205; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 206; AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[2,2,3,3] 207; AVX2-NEXT: vpsrlq %xmm1, %xmm2, %xmm2 208; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 209; AVX2-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 210; AVX2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[0,2] 211; AVX2-NEXT: retq 212; 213; AVX512F-LABEL: splatvar_funnnel_v2i32: 214; AVX512F: # %bb.0: 215; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 216; AVX512F-NEXT: vpbroadcastd %xmm1, %xmm1 217; AVX512F-NEXT: vprorvd %zmm1, %zmm0, %zmm0 218; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 219; AVX512F-NEXT: vzeroupper 220; AVX512F-NEXT: retq 221; 222; AVX512VL-LABEL: splatvar_funnnel_v2i32: 223; AVX512VL: # %bb.0: 224; AVX512VL-NEXT: vpbroadcastd %xmm1, %xmm1 225; AVX512VL-NEXT: vprorvd %xmm1, %xmm0, %xmm0 226; AVX512VL-NEXT: retq 227; 228; AVX512BW-LABEL: splatvar_funnnel_v2i32: 229; AVX512BW: # %bb.0: 230; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 231; AVX512BW-NEXT: vpbroadcastd %xmm1, %xmm1 232; AVX512BW-NEXT: vprorvd %zmm1, %zmm0, %zmm0 233; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 234; AVX512BW-NEXT: vzeroupper 235; AVX512BW-NEXT: retq 236; 237; AVX512VLBW-LABEL: splatvar_funnnel_v2i32: 238; AVX512VLBW: # %bb.0: 239; AVX512VLBW-NEXT: vpbroadcastd %xmm1, %xmm1 240; AVX512VLBW-NEXT: vprorvd %xmm1, %xmm0, %xmm0 241; AVX512VLBW-NEXT: retq 242; 243; AVX512VBMI2-LABEL: splatvar_funnnel_v2i32: 244; AVX512VBMI2: # %bb.0: 245; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 246; AVX512VBMI2-NEXT: vpbroadcastd %xmm1, %xmm1 247; AVX512VBMI2-NEXT: vprorvd %zmm1, %zmm0, %zmm0 248; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 249; AVX512VBMI2-NEXT: vzeroupper 250; AVX512VBMI2-NEXT: retq 251; 252; AVX512VLVBMI2-LABEL: splatvar_funnnel_v2i32: 253; AVX512VLVBMI2: # %bb.0: 254; AVX512VLVBMI2-NEXT: vpbroadcastd %xmm1, %xmm1 255; AVX512VLVBMI2-NEXT: vprorvd %xmm1, %xmm0, %xmm0 256; AVX512VLVBMI2-NEXT: retq 257; 258; XOPAVX1-LABEL: splatvar_funnnel_v2i32: 259; XOPAVX1: # %bb.0: 260; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] 261; XOPAVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 262; XOPAVX1-NEXT: vpsubd %xmm1, %xmm2, %xmm1 263; XOPAVX1-NEXT: vprotd %xmm1, %xmm0, %xmm0 264; XOPAVX1-NEXT: retq 265; 266; XOPAVX2-LABEL: splatvar_funnnel_v2i32: 267; XOPAVX2: # %bb.0: 268; XOPAVX2-NEXT: vpbroadcastd %xmm1, %xmm1 269; XOPAVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 270; XOPAVX2-NEXT: vpsubd %xmm1, %xmm2, %xmm1 271; XOPAVX2-NEXT: vprotd %xmm1, %xmm0, %xmm0 272; XOPAVX2-NEXT: retq 273; 274; X86-SSE2-LABEL: splatvar_funnnel_v2i32: 275; X86-SSE2: # %bb.0: 276; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 277; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,2,3,3] 278; X86-SSE2-NEXT: psrlq %xmm1, %xmm2 279; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 280; X86-SSE2-NEXT: psrlq %xmm1, %xmm0 281; X86-SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[0,2] 282; X86-SSE2-NEXT: retl 283 %splat = shufflevector <2 x i32> %amt, <2 x i32> undef, <2 x i32> zeroinitializer 284 %res = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %x, <2 x i32> %x, <2 x i32> %splat) 285 ret <2 x i32> %res 286} 287 288; 289; Constant Shifts 290; 291 292define <2 x i32> @constant_funnnel_v2i32(<2 x i32> %x) nounwind { 293; SSE2-LABEL: constant_funnnel_v2i32: 294; SSE2: # %bb.0: 295; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 296; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 297; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,3,2,3] 298; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 299; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,3,2,3] 300; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] 301; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 302; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 303; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 304; SSE2-NEXT: por %xmm2, %xmm0 305; SSE2-NEXT: retq 306; 307; SSE41-LABEL: constant_funnnel_v2i32: 308; SSE41: # %bb.0: 309; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 310; SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 311; SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 312; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 313; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] 314; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] 315; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] 316; SSE41-NEXT: por %xmm2, %xmm0 317; SSE41-NEXT: retq 318; 319; AVX1-LABEL: constant_funnnel_v2i32: 320; AVX1: # %bb.0: 321; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 322; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 323; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 324; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 325; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] 326; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] 327; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] 328; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0 329; AVX1-NEXT: retq 330; 331; AVX2-LABEL: constant_funnnel_v2i32: 332; AVX2: # %bb.0: 333; AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 334; AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 335; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 336; AVX2-NEXT: retq 337; 338; AVX512F-LABEL: constant_funnnel_v2i32: 339; AVX512F: # %bb.0: 340; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 341; AVX512F-NEXT: vpmovsxbd {{.*#+}} xmm1 = [4,5,0,0] 342; AVX512F-NEXT: vprorvd %zmm1, %zmm0, %zmm0 343; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 344; AVX512F-NEXT: vzeroupper 345; AVX512F-NEXT: retq 346; 347; AVX512VL-LABEL: constant_funnnel_v2i32: 348; AVX512VL: # %bb.0: 349; AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 350; AVX512VL-NEXT: retq 351; 352; AVX512BW-LABEL: constant_funnnel_v2i32: 353; AVX512BW: # %bb.0: 354; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 355; AVX512BW-NEXT: vpmovsxbd {{.*#+}} xmm1 = [4,5,0,0] 356; AVX512BW-NEXT: vprorvd %zmm1, %zmm0, %zmm0 357; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 358; AVX512BW-NEXT: vzeroupper 359; AVX512BW-NEXT: retq 360; 361; AVX512VLBW-LABEL: constant_funnnel_v2i32: 362; AVX512VLBW: # %bb.0: 363; AVX512VLBW-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 364; AVX512VLBW-NEXT: retq 365; 366; AVX512VBMI2-LABEL: constant_funnnel_v2i32: 367; AVX512VBMI2: # %bb.0: 368; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 369; AVX512VBMI2-NEXT: vpmovsxbd {{.*#+}} xmm1 = [4,5,0,0] 370; AVX512VBMI2-NEXT: vprorvd %zmm1, %zmm0, %zmm0 371; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 372; AVX512VBMI2-NEXT: vzeroupper 373; AVX512VBMI2-NEXT: retq 374; 375; AVX512VLVBMI2-LABEL: constant_funnnel_v2i32: 376; AVX512VLVBMI2: # %bb.0: 377; AVX512VLVBMI2-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 378; AVX512VLVBMI2-NEXT: retq 379; 380; XOP-LABEL: constant_funnnel_v2i32: 381; XOP: # %bb.0: 382; XOP-NEXT: vprotd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 383; XOP-NEXT: retq 384; 385; X86-SSE2-LABEL: constant_funnnel_v2i32: 386; X86-SSE2: # %bb.0: 387; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 388; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 389; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,3,2,3] 390; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 391; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,3,2,3] 392; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] 393; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 394; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 395; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 396; X86-SSE2-NEXT: por %xmm2, %xmm0 397; X86-SSE2-NEXT: retl 398 %res = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %x, <2 x i32> %x, <2 x i32> <i32 4, i32 5>) 399 ret <2 x i32> %res 400} 401 402; 403; Uniform Constant Shifts 404; 405 406define <2 x i32> @splatconstant_funnnel_v2i32(<2 x i32> %x) nounwind { 407; SSE2-LABEL: splatconstant_funnnel_v2i32: 408; SSE2: # %bb.0: 409; SSE2-NEXT: movdqa %xmm0, %xmm1 410; SSE2-NEXT: psrld $4, %xmm1 411; SSE2-NEXT: pslld $28, %xmm0 412; SSE2-NEXT: por %xmm1, %xmm0 413; SSE2-NEXT: retq 414; 415; SSE41-LABEL: splatconstant_funnnel_v2i32: 416; SSE41: # %bb.0: 417; SSE41-NEXT: movdqa %xmm0, %xmm1 418; SSE41-NEXT: psrld $4, %xmm1 419; SSE41-NEXT: pslld $28, %xmm0 420; SSE41-NEXT: por %xmm1, %xmm0 421; SSE41-NEXT: retq 422; 423; AVX1-LABEL: splatconstant_funnnel_v2i32: 424; AVX1: # %bb.0: 425; AVX1-NEXT: vpsrld $4, %xmm0, %xmm1 426; AVX1-NEXT: vpslld $28, %xmm0, %xmm0 427; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 428; AVX1-NEXT: retq 429; 430; AVX2-LABEL: splatconstant_funnnel_v2i32: 431; AVX2: # %bb.0: 432; AVX2-NEXT: vpsrld $4, %xmm0, %xmm1 433; AVX2-NEXT: vpslld $28, %xmm0, %xmm0 434; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 435; AVX2-NEXT: retq 436; 437; AVX512F-LABEL: splatconstant_funnnel_v2i32: 438; AVX512F: # %bb.0: 439; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 440; AVX512F-NEXT: vprord $4, %zmm0, %zmm0 441; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 442; AVX512F-NEXT: vzeroupper 443; AVX512F-NEXT: retq 444; 445; AVX512VL-LABEL: splatconstant_funnnel_v2i32: 446; AVX512VL: # %bb.0: 447; AVX512VL-NEXT: vprord $4, %xmm0, %xmm0 448; AVX512VL-NEXT: retq 449; 450; AVX512BW-LABEL: splatconstant_funnnel_v2i32: 451; AVX512BW: # %bb.0: 452; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 453; AVX512BW-NEXT: vprord $4, %zmm0, %zmm0 454; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 455; AVX512BW-NEXT: vzeroupper 456; AVX512BW-NEXT: retq 457; 458; AVX512VLBW-LABEL: splatconstant_funnnel_v2i32: 459; AVX512VLBW: # %bb.0: 460; AVX512VLBW-NEXT: vprord $4, %xmm0, %xmm0 461; AVX512VLBW-NEXT: retq 462; 463; AVX512VBMI2-LABEL: splatconstant_funnnel_v2i32: 464; AVX512VBMI2: # %bb.0: 465; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 466; AVX512VBMI2-NEXT: vprord $4, %zmm0, %zmm0 467; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 468; AVX512VBMI2-NEXT: vzeroupper 469; AVX512VBMI2-NEXT: retq 470; 471; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v2i32: 472; AVX512VLVBMI2: # %bb.0: 473; AVX512VLVBMI2-NEXT: vprord $4, %xmm0, %xmm0 474; AVX512VLVBMI2-NEXT: retq 475; 476; XOP-LABEL: splatconstant_funnnel_v2i32: 477; XOP: # %bb.0: 478; XOP-NEXT: vprotd $28, %xmm0, %xmm0 479; XOP-NEXT: retq 480; 481; X86-SSE2-LABEL: splatconstant_funnnel_v2i32: 482; X86-SSE2: # %bb.0: 483; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 484; X86-SSE2-NEXT: psrld $4, %xmm1 485; X86-SSE2-NEXT: pslld $28, %xmm0 486; X86-SSE2-NEXT: por %xmm1, %xmm0 487; X86-SSE2-NEXT: retl 488 %res = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %x, <2 x i32> %x, <2 x i32> <i32 4, i32 4>) 489 ret <2 x i32> %res 490} 491