1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefix=SSE --check-prefix=SSSE3 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 7 8; AVX128 tests: 9 10define <4 x float> @vsel_float(<4 x float> %v1, <4 x float> %v2) { 11; SSE2-LABEL: vsel_float: 12; SSE2: # %bb.0: # %entry 13; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3] 14; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3] 15; SSE2-NEXT: retq 16; 17; SSSE3-LABEL: vsel_float: 18; SSSE3: # %bb.0: # %entry 19; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3] 20; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3] 21; SSSE3-NEXT: retq 22; 23; SSE41-LABEL: vsel_float: 24; SSE41: # %bb.0: # %entry 25; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] 26; SSE41-NEXT: retq 27; 28; AVX-LABEL: vsel_float: 29; AVX: # %bb.0: # %entry 30; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] 31; AVX-NEXT: retq 32entry: 33 %vsel = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x float> %v1, <4 x float> %v2 34 ret <4 x float> %vsel 35} 36 37define <4 x float> @vsel_float2(<4 x float> %v1, <4 x float> %v2) { 38; SSE2-LABEL: vsel_float2: 39; SSE2: # %bb.0: # %entry 40; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] 41; SSE2-NEXT: movaps %xmm1, %xmm0 42; SSE2-NEXT: retq 43; 44; SSSE3-LABEL: vsel_float2: 45; SSSE3: # %bb.0: # %entry 46; SSSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] 47; SSSE3-NEXT: movaps %xmm1, %xmm0 48; SSSE3-NEXT: retq 49; 50; SSE41-LABEL: vsel_float2: 51; SSE41: # %bb.0: # %entry 52; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] 53; SSE41-NEXT: retq 54; 55; AVX-LABEL: vsel_float2: 56; AVX: # %bb.0: # %entry 57; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] 58; AVX-NEXT: retq 59entry: 60 %vsel = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x float> %v1, <4 x float> %v2 61 ret <4 x float> %vsel 62} 63 64define <4 x i8> @vsel_4xi8(<4 x i8> %v1, <4 x i8> %v2) { 65; SSE2-LABEL: vsel_4xi8: 66; SSE2: # %bb.0: # %entry 67; SSE2-NEXT: movaps {{.*#+}} xmm2 = [255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255] 68; SSE2-NEXT: andps %xmm2, %xmm0 69; SSE2-NEXT: andnps %xmm1, %xmm2 70; SSE2-NEXT: orps %xmm2, %xmm0 71; SSE2-NEXT: retq 72; 73; SSSE3-LABEL: vsel_4xi8: 74; SSSE3: # %bb.0: # %entry 75; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 76; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,5,6,u,u,u,u,u,u,u,u,u,u,u,u] 77; SSSE3-NEXT: retq 78; 79; SSE41-LABEL: vsel_4xi8: 80; SSE41: # %bb.0: # %entry 81; SSE41-NEXT: movdqa %xmm0, %xmm2 82; SSE41-NEXT: movss {{.*#+}} xmm0 = [255,255,0,255,0,0,0,0,0,0,0,0,0,0,0,0] 83; SSE41-NEXT: pblendvb %xmm0, %xmm2, %xmm1 84; SSE41-NEXT: movdqa %xmm1, %xmm0 85; SSE41-NEXT: retq 86; 87; AVX-LABEL: vsel_4xi8: 88; AVX: # %bb.0: # %entry 89; AVX-NEXT: vmovd {{.*#+}} xmm2 = [255,255,0,255,0,0,0,0,0,0,0,0,0,0,0,0] 90; AVX-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0 91; AVX-NEXT: retq 92entry: 93 %vsel = select <4 x i1> <i1 true, i1 true, i1 false, i1 true>, <4 x i8> %v1, <4 x i8> %v2 94 ret <4 x i8> %vsel 95} 96 97define <4 x i16> @vsel_4xi16(<4 x i16> %v1, <4 x i16> %v2) { 98; SSE2-LABEL: vsel_4xi16: 99; SSE2: # %bb.0: # %entry 100; SSE2-NEXT: movaps {{.*#+}} xmm2 = [65535,0,65535,65535,65535,65535,65535,65535] 101; SSE2-NEXT: andps %xmm2, %xmm0 102; SSE2-NEXT: andnps %xmm1, %xmm2 103; SSE2-NEXT: orps %xmm2, %xmm0 104; SSE2-NEXT: retq 105; 106; SSSE3-LABEL: vsel_4xi16: 107; SSSE3: # %bb.0: # %entry 108; SSSE3-NEXT: movaps {{.*#+}} xmm2 = [65535,0,65535,65535,65535,65535,65535,65535] 109; SSSE3-NEXT: andps %xmm2, %xmm0 110; SSSE3-NEXT: andnps %xmm1, %xmm2 111; SSSE3-NEXT: orps %xmm2, %xmm0 112; SSSE3-NEXT: retq 113; 114; SSE41-LABEL: vsel_4xi16: 115; SSE41: # %bb.0: # %entry 116; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6,7] 117; SSE41-NEXT: retq 118; 119; AVX-LABEL: vsel_4xi16: 120; AVX: # %bb.0: # %entry 121; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6,7] 122; AVX-NEXT: retq 123entry: 124 %vsel = select <4 x i1> <i1 true, i1 false, i1 true, i1 true>, <4 x i16> %v1, <4 x i16> %v2 125 ret <4 x i16> %vsel 126} 127 128define <4 x i32> @vsel_i32(<4 x i32> %v1, <4 x i32> %v2) { 129; SSE2-LABEL: vsel_i32: 130; SSE2: # %bb.0: # %entry 131; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3] 132; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 133; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 134; SSE2-NEXT: retq 135; 136; SSSE3-LABEL: vsel_i32: 137; SSSE3: # %bb.0: # %entry 138; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3] 139; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 140; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 141; SSSE3-NEXT: retq 142; 143; SSE41-LABEL: vsel_i32: 144; SSE41: # %bb.0: # %entry 145; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] 146; SSE41-NEXT: retq 147; 148; AVX-LABEL: vsel_i32: 149; AVX: # %bb.0: # %entry 150; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] 151; AVX-NEXT: retq 152entry: 153 %vsel = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x i32> %v1, <4 x i32> %v2 154 ret <4 x i32> %vsel 155} 156 157define <2 x double> @vsel_double(<2 x double> %v1, <2 x double> %v2) { 158; SSE2-LABEL: vsel_double: 159; SSE2: # %bb.0: # %entry 160; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 161; SSE2-NEXT: retq 162; 163; SSSE3-LABEL: vsel_double: 164; SSSE3: # %bb.0: # %entry 165; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 166; SSSE3-NEXT: retq 167; 168; SSE41-LABEL: vsel_double: 169; SSE41: # %bb.0: # %entry 170; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 171; SSE41-NEXT: retq 172; 173; AVX-LABEL: vsel_double: 174; AVX: # %bb.0: # %entry 175; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 176; AVX-NEXT: retq 177entry: 178 %vsel = select <2 x i1> <i1 true, i1 false>, <2 x double> %v1, <2 x double> %v2 179 ret <2 x double> %vsel 180} 181 182define <2 x i64> @vsel_i64(<2 x i64> %v1, <2 x i64> %v2) { 183; SSE2-LABEL: vsel_i64: 184; SSE2: # %bb.0: # %entry 185; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 186; SSE2-NEXT: retq 187; 188; SSSE3-LABEL: vsel_i64: 189; SSSE3: # %bb.0: # %entry 190; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 191; SSSE3-NEXT: retq 192; 193; SSE41-LABEL: vsel_i64: 194; SSE41: # %bb.0: # %entry 195; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 196; SSE41-NEXT: retq 197; 198; AVX-LABEL: vsel_i64: 199; AVX: # %bb.0: # %entry 200; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 201; AVX-NEXT: retq 202entry: 203 %vsel = select <2 x i1> <i1 true, i1 false>, <2 x i64> %v1, <2 x i64> %v2 204 ret <2 x i64> %vsel 205} 206 207define <8 x i16> @vsel_8xi16(<8 x i16> %v1, <8 x i16> %v2) { 208; SSE2-LABEL: vsel_8xi16: 209; SSE2: # %bb.0: # %entry 210; SSE2-NEXT: movaps {{.*#+}} xmm2 = [0,65535,65535,65535,0,65535,65535,65535] 211; SSE2-NEXT: andps %xmm2, %xmm1 212; SSE2-NEXT: andnps %xmm0, %xmm2 213; SSE2-NEXT: orps %xmm1, %xmm2 214; SSE2-NEXT: movaps %xmm2, %xmm0 215; SSE2-NEXT: retq 216; 217; SSSE3-LABEL: vsel_8xi16: 218; SSSE3: # %bb.0: # %entry 219; SSSE3-NEXT: movaps {{.*#+}} xmm2 = [0,65535,65535,65535,0,65535,65535,65535] 220; SSSE3-NEXT: andps %xmm2, %xmm1 221; SSSE3-NEXT: andnps %xmm0, %xmm2 222; SSSE3-NEXT: orps %xmm1, %xmm2 223; SSSE3-NEXT: movaps %xmm2, %xmm0 224; SSSE3-NEXT: retq 225; 226; SSE41-LABEL: vsel_8xi16: 227; SSE41: # %bb.0: # %entry 228; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3],xmm0[4],xmm1[5,6,7] 229; SSE41-NEXT: retq 230; 231; AVX-LABEL: vsel_8xi16: 232; AVX: # %bb.0: # %entry 233; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3],xmm0[4],xmm1[5,6,7] 234; AVX-NEXT: retq 235entry: 236 %vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x i16> %v1, <8 x i16> %v2 237 ret <8 x i16> %vsel 238} 239 240define <16 x i8> @vsel_i8(<16 x i8> %v1, <16 x i8> %v2) { 241; SSE2-LABEL: vsel_i8: 242; SSE2: # %bb.0: # %entry 243; SSE2-NEXT: movaps {{.*#+}} xmm2 = [0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255] 244; SSE2-NEXT: andps %xmm2, %xmm1 245; SSE2-NEXT: andnps %xmm0, %xmm2 246; SSE2-NEXT: orps %xmm1, %xmm2 247; SSE2-NEXT: movaps %xmm2, %xmm0 248; SSE2-NEXT: retq 249; 250; SSSE3-LABEL: vsel_i8: 251; SSSE3: # %bb.0: # %entry 252; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[12],zero,zero,zero 253; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = zero,xmm1[1,2,3],zero,xmm1[5,6,7],zero,xmm1[9,10,11],zero,xmm1[13,14,15] 254; SSSE3-NEXT: por %xmm1, %xmm0 255; SSSE3-NEXT: retq 256; 257; SSE41-LABEL: vsel_i8: 258; SSE41: # %bb.0: # %entry 259; SSE41-NEXT: movdqa %xmm0, %xmm2 260; SSE41-NEXT: movaps {{.*#+}} xmm0 = [0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255] 261; SSE41-NEXT: pblendvb %xmm0, %xmm1, %xmm2 262; SSE41-NEXT: movdqa %xmm2, %xmm0 263; SSE41-NEXT: retq 264; 265; AVX1-LABEL: vsel_i8: 266; AVX1: # %bb.0: # %entry 267; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255] 268; AVX1-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 269; AVX1-NEXT: retq 270; 271; AVX2-LABEL: vsel_i8: 272; AVX2: # %bb.0: # %entry 273; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255] 274; AVX2-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 275; AVX2-NEXT: retq 276entry: 277 %vsel = select <16 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <16 x i8> %v1, <16 x i8> %v2 278 ret <16 x i8> %vsel 279} 280 281 282; AVX256 tests: 283 284define <8 x float> @vsel_float8(<8 x float> %v1, <8 x float> %v2) { 285; SSE2-LABEL: vsel_float8: 286; SSE2: # %bb.0: # %entry 287; SSE2-NEXT: movss {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3] 288; SSE2-NEXT: movss {{.*#+}} xmm3 = xmm1[0],xmm3[1,2,3] 289; SSE2-NEXT: movaps %xmm2, %xmm0 290; SSE2-NEXT: movaps %xmm3, %xmm1 291; SSE2-NEXT: retq 292; 293; SSSE3-LABEL: vsel_float8: 294; SSSE3: # %bb.0: # %entry 295; SSSE3-NEXT: movss {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3] 296; SSSE3-NEXT: movss {{.*#+}} xmm3 = xmm1[0],xmm3[1,2,3] 297; SSSE3-NEXT: movaps %xmm2, %xmm0 298; SSSE3-NEXT: movaps %xmm3, %xmm1 299; SSSE3-NEXT: retq 300; 301; SSE41-LABEL: vsel_float8: 302; SSE41: # %bb.0: # %entry 303; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3] 304; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm1[0],xmm3[1,2,3] 305; SSE41-NEXT: retq 306; 307; AVX-LABEL: vsel_float8: 308; AVX: # %bb.0: # %entry 309; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7] 310; AVX-NEXT: retq 311entry: 312 %vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x float> %v1, <8 x float> %v2 313 ret <8 x float> %vsel 314} 315 316define <8 x i32> @vsel_i328(<8 x i32> %v1, <8 x i32> %v2) { 317; SSE2-LABEL: vsel_i328: 318; SSE2: # %bb.0: # %entry 319; SSE2-NEXT: movss {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3] 320; SSE2-NEXT: movss {{.*#+}} xmm3 = xmm1[0],xmm3[1,2,3] 321; SSE2-NEXT: movaps %xmm2, %xmm0 322; SSE2-NEXT: movaps %xmm3, %xmm1 323; SSE2-NEXT: retq 324; 325; SSSE3-LABEL: vsel_i328: 326; SSSE3: # %bb.0: # %entry 327; SSSE3-NEXT: movss {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3] 328; SSSE3-NEXT: movss {{.*#+}} xmm3 = xmm1[0],xmm3[1,2,3] 329; SSSE3-NEXT: movaps %xmm2, %xmm0 330; SSSE3-NEXT: movaps %xmm3, %xmm1 331; SSSE3-NEXT: retq 332; 333; SSE41-LABEL: vsel_i328: 334; SSE41: # %bb.0: # %entry 335; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3] 336; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm1[0],xmm3[1,2,3] 337; SSE41-NEXT: retq 338; 339; AVX-LABEL: vsel_i328: 340; AVX: # %bb.0: # %entry 341; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7] 342; AVX-NEXT: retq 343entry: 344 %vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x i32> %v1, <8 x i32> %v2 345 ret <8 x i32> %vsel 346} 347 348define <8 x double> @vsel_double8(<8 x double> %v1, <8 x double> %v2) { 349; SSE2-LABEL: vsel_double8: 350; SSE2: # %bb.0: # %entry 351; SSE2-NEXT: movaps %xmm7, %xmm3 352; SSE2-NEXT: movaps %xmm5, %xmm1 353; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm4[2,3] 354; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,1],xmm6[2,3] 355; SSE2-NEXT: retq 356; 357; SSSE3-LABEL: vsel_double8: 358; SSSE3: # %bb.0: # %entry 359; SSSE3-NEXT: movaps %xmm7, %xmm3 360; SSSE3-NEXT: movaps %xmm5, %xmm1 361; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm4[2,3] 362; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,1],xmm6[2,3] 363; SSSE3-NEXT: retq 364; 365; SSE41-LABEL: vsel_double8: 366; SSE41: # %bb.0: # %entry 367; SSE41-NEXT: movaps %xmm7, %xmm3 368; SSE41-NEXT: movaps %xmm5, %xmm1 369; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm4[2,3] 370; SSE41-NEXT: blendps {{.*#+}} xmm2 = xmm2[0,1],xmm6[2,3] 371; SSE41-NEXT: retq 372; 373; AVX-LABEL: vsel_double8: 374; AVX: # %bb.0: # %entry 375; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm2[2,3,4,5,6,7] 376; AVX-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1],ymm3[2,3,4,5,6,7] 377; AVX-NEXT: retq 378entry: 379 %vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x double> %v1, <8 x double> %v2 380 ret <8 x double> %vsel 381} 382 383define <8 x i64> @vsel_i648(<8 x i64> %v1, <8 x i64> %v2) { 384; SSE2-LABEL: vsel_i648: 385; SSE2: # %bb.0: # %entry 386; SSE2-NEXT: movaps %xmm7, %xmm3 387; SSE2-NEXT: movaps %xmm5, %xmm1 388; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm4[2,3] 389; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,1],xmm6[2,3] 390; SSE2-NEXT: retq 391; 392; SSSE3-LABEL: vsel_i648: 393; SSSE3: # %bb.0: # %entry 394; SSSE3-NEXT: movaps %xmm7, %xmm3 395; SSSE3-NEXT: movaps %xmm5, %xmm1 396; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm4[2,3] 397; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,1],xmm6[2,3] 398; SSSE3-NEXT: retq 399; 400; SSE41-LABEL: vsel_i648: 401; SSE41: # %bb.0: # %entry 402; SSE41-NEXT: movaps %xmm7, %xmm3 403; SSE41-NEXT: movaps %xmm5, %xmm1 404; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm4[2,3] 405; SSE41-NEXT: blendps {{.*#+}} xmm2 = xmm2[0,1],xmm6[2,3] 406; SSE41-NEXT: retq 407; 408; AVX-LABEL: vsel_i648: 409; AVX: # %bb.0: # %entry 410; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm2[2,3,4,5,6,7] 411; AVX-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1],ymm3[2,3,4,5,6,7] 412; AVX-NEXT: retq 413entry: 414 %vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x i64> %v1, <8 x i64> %v2 415 ret <8 x i64> %vsel 416} 417 418define <4 x double> @vsel_double4(<4 x double> %v1, <4 x double> %v2) { 419; SSE2-LABEL: vsel_double4: 420; SSE2: # %bb.0: # %entry 421; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3] 422; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3] 423; SSE2-NEXT: retq 424; 425; SSSE3-LABEL: vsel_double4: 426; SSSE3: # %bb.0: # %entry 427; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3] 428; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3] 429; SSSE3-NEXT: retq 430; 431; SSE41-LABEL: vsel_double4: 432; SSE41: # %bb.0: # %entry 433; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3] 434; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3] 435; SSE41-NEXT: retq 436; 437; AVX-LABEL: vsel_double4: 438; AVX: # %bb.0: # %entry 439; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7] 440; AVX-NEXT: retq 441entry: 442 %vsel = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x double> %v1, <4 x double> %v2 443 ret <4 x double> %vsel 444} 445 446define <2 x double> @testa(<2 x double> %x, <2 x double> %y) { 447; SSE2-LABEL: testa: 448; SSE2: # %bb.0: # %entry 449; SSE2-NEXT: movapd %xmm1, %xmm2 450; SSE2-NEXT: cmplepd %xmm0, %xmm2 451; SSE2-NEXT: andpd %xmm2, %xmm0 452; SSE2-NEXT: andnpd %xmm1, %xmm2 453; SSE2-NEXT: orpd %xmm2, %xmm0 454; SSE2-NEXT: retq 455; 456; SSSE3-LABEL: testa: 457; SSSE3: # %bb.0: # %entry 458; SSSE3-NEXT: movapd %xmm1, %xmm2 459; SSSE3-NEXT: cmplepd %xmm0, %xmm2 460; SSSE3-NEXT: andpd %xmm2, %xmm0 461; SSSE3-NEXT: andnpd %xmm1, %xmm2 462; SSSE3-NEXT: orpd %xmm2, %xmm0 463; SSSE3-NEXT: retq 464; 465; SSE41-LABEL: testa: 466; SSE41: # %bb.0: # %entry 467; SSE41-NEXT: movapd %xmm0, %xmm2 468; SSE41-NEXT: movapd %xmm1, %xmm0 469; SSE41-NEXT: cmplepd %xmm2, %xmm0 470; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm1 471; SSE41-NEXT: movapd %xmm1, %xmm0 472; SSE41-NEXT: retq 473; 474; AVX-LABEL: testa: 475; AVX: # %bb.0: # %entry 476; AVX-NEXT: vcmplepd %xmm0, %xmm1, %xmm2 477; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 478; AVX-NEXT: retq 479entry: 480 %max_is_x = fcmp oge <2 x double> %x, %y 481 %max = select <2 x i1> %max_is_x, <2 x double> %x, <2 x double> %y 482 ret <2 x double> %max 483} 484 485define <2 x double> @testb(<2 x double> %x, <2 x double> %y) { 486; SSE2-LABEL: testb: 487; SSE2: # %bb.0: # %entry 488; SSE2-NEXT: movapd %xmm1, %xmm2 489; SSE2-NEXT: cmpnlepd %xmm0, %xmm2 490; SSE2-NEXT: andpd %xmm2, %xmm0 491; SSE2-NEXT: andnpd %xmm1, %xmm2 492; SSE2-NEXT: orpd %xmm2, %xmm0 493; SSE2-NEXT: retq 494; 495; SSSE3-LABEL: testb: 496; SSSE3: # %bb.0: # %entry 497; SSSE3-NEXT: movapd %xmm1, %xmm2 498; SSSE3-NEXT: cmpnlepd %xmm0, %xmm2 499; SSSE3-NEXT: andpd %xmm2, %xmm0 500; SSSE3-NEXT: andnpd %xmm1, %xmm2 501; SSSE3-NEXT: orpd %xmm2, %xmm0 502; SSSE3-NEXT: retq 503; 504; SSE41-LABEL: testb: 505; SSE41: # %bb.0: # %entry 506; SSE41-NEXT: movapd %xmm0, %xmm2 507; SSE41-NEXT: movapd %xmm1, %xmm0 508; SSE41-NEXT: cmpnlepd %xmm2, %xmm0 509; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm1 510; SSE41-NEXT: movapd %xmm1, %xmm0 511; SSE41-NEXT: retq 512; 513; AVX-LABEL: testb: 514; AVX: # %bb.0: # %entry 515; AVX-NEXT: vcmpnlepd %xmm0, %xmm1, %xmm2 516; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 517; AVX-NEXT: retq 518entry: 519 %min_is_x = fcmp ult <2 x double> %x, %y 520 %min = select <2 x i1> %min_is_x, <2 x double> %x, <2 x double> %y 521 ret <2 x double> %min 522} 523 524; If we can figure out a blend has a constant mask, we should emit the 525; blend instruction with an immediate mask 526define <4 x double> @constant_blendvpd_avx(<4 x double> %xy, <4 x double> %ab) { 527; SSE2-LABEL: constant_blendvpd_avx: 528; SSE2: # %bb.0: # %entry 529; SSE2-NEXT: movaps %xmm2, %xmm0 530; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3] 531; SSE2-NEXT: retq 532; 533; SSSE3-LABEL: constant_blendvpd_avx: 534; SSSE3: # %bb.0: # %entry 535; SSSE3-NEXT: movaps %xmm2, %xmm0 536; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3] 537; SSSE3-NEXT: retq 538; 539; SSE41-LABEL: constant_blendvpd_avx: 540; SSE41: # %bb.0: # %entry 541; SSE41-NEXT: movaps %xmm2, %xmm0 542; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3] 543; SSE41-NEXT: retq 544; 545; AVX-LABEL: constant_blendvpd_avx: 546; AVX: # %bb.0: # %entry 547; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5],ymm1[6,7] 548; AVX-NEXT: retq 549entry: 550 %select = select <4 x i1> <i1 false, i1 false, i1 true, i1 false>, <4 x double> %xy, <4 x double> %ab 551 ret <4 x double> %select 552} 553 554define <8 x float> @constant_blendvps_avx(<8 x float> %xyzw, <8 x float> %abcd) { 555; SSE2-LABEL: constant_blendvps_avx: 556; SSE2: # %bb.0: # %entry 557; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm2[2,0] 558; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,1],xmm0[2,0] 559; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,0],xmm3[2,0] 560; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[0,1],xmm1[2,0] 561; SSE2-NEXT: movaps %xmm2, %xmm0 562; SSE2-NEXT: movaps %xmm3, %xmm1 563; SSE2-NEXT: retq 564; 565; SSSE3-LABEL: constant_blendvps_avx: 566; SSSE3: # %bb.0: # %entry 567; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm2[2,0] 568; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,1],xmm0[2,0] 569; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,0],xmm3[2,0] 570; SSSE3-NEXT: shufps {{.*#+}} xmm3 = xmm3[0,1],xmm1[2,0] 571; SSSE3-NEXT: movaps %xmm2, %xmm0 572; SSSE3-NEXT: movaps %xmm3, %xmm1 573; SSSE3-NEXT: retq 574; 575; SSE41-LABEL: constant_blendvps_avx: 576; SSE41: # %bb.0: # %entry 577; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm2[0,1,2],xmm0[3] 578; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm3[0,1,2],xmm1[3] 579; SSE41-NEXT: retq 580; 581; AVX-LABEL: constant_blendvps_avx: 582; AVX: # %bb.0: # %entry 583; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3],ymm1[4,5,6],ymm0[7] 584; AVX-NEXT: retq 585entry: 586 %select = select <8 x i1> <i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true>, <8 x float> %xyzw, <8 x float> %abcd 587 ret <8 x float> %select 588} 589 590define <32 x i8> @constant_pblendvb_avx2(<32 x i8> %xyzw, <32 x i8> %abcd) { 591; SSE2-LABEL: constant_pblendvb_avx2: 592; SSE2: # %bb.0: # %entry 593; SSE2-NEXT: movaps {{.*#+}} xmm4 = [255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255] 594; SSE2-NEXT: movaps %xmm4, %xmm5 595; SSE2-NEXT: andnps %xmm0, %xmm5 596; SSE2-NEXT: andps %xmm4, %xmm2 597; SSE2-NEXT: orps %xmm2, %xmm5 598; SSE2-NEXT: andps %xmm4, %xmm3 599; SSE2-NEXT: andnps %xmm1, %xmm4 600; SSE2-NEXT: orps %xmm3, %xmm4 601; SSE2-NEXT: movaps %xmm5, %xmm0 602; SSE2-NEXT: movaps %xmm4, %xmm1 603; SSE2-NEXT: retq 604; 605; SSSE3-LABEL: constant_pblendvb_avx2: 606; SSSE3: # %bb.0: # %entry 607; SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [128,128,2,128,4,5,6,128,128,128,10,128,12,13,14,128] 608; SSSE3-NEXT: pshufb %xmm4, %xmm0 609; SSSE3-NEXT: movdqa {{.*#+}} xmm5 = [0,1,128,3,128,128,128,7,8,9,128,11,128,128,128,15] 610; SSSE3-NEXT: pshufb %xmm5, %xmm2 611; SSSE3-NEXT: por %xmm2, %xmm0 612; SSSE3-NEXT: pshufb %xmm4, %xmm1 613; SSSE3-NEXT: pshufb %xmm5, %xmm3 614; SSSE3-NEXT: por %xmm3, %xmm1 615; SSSE3-NEXT: retq 616; 617; SSE41-LABEL: constant_pblendvb_avx2: 618; SSE41: # %bb.0: # %entry 619; SSE41-NEXT: movdqa %xmm0, %xmm4 620; SSE41-NEXT: movaps {{.*#+}} xmm0 = [255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255] 621; SSE41-NEXT: pblendvb %xmm0, %xmm2, %xmm4 622; SSE41-NEXT: pblendvb %xmm0, %xmm3, %xmm1 623; SSE41-NEXT: movdqa %xmm4, %xmm0 624; SSE41-NEXT: retq 625; 626; AVX1-LABEL: constant_pblendvb_avx2: 627; AVX1: # %bb.0: # %entry 628; AVX1-NEXT: vbroadcastsd {{.*#+}} ymm2 = [255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255] 629; AVX1-NEXT: vandnps %ymm0, %ymm2, %ymm0 630; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1 631; AVX1-NEXT: vorps %ymm0, %ymm1, %ymm0 632; AVX1-NEXT: retq 633; 634; AVX2-LABEL: constant_pblendvb_avx2: 635; AVX2: # %bb.0: # %entry 636; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255] 637; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 638; AVX2-NEXT: retq 639entry: 640 %select = select <32 x i1> <i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false>, <32 x i8> %xyzw, <32 x i8> %abcd 641 ret <32 x i8> %select 642} 643 644declare <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float>, <8 x float>, <8 x float>) 645declare <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double>, <4 x double>, <4 x double>) 646 647;; 4 tests for shufflevectors that optimize to blend + immediate 648define <4 x float> @blend_shufflevector_4xfloat(<4 x float> %a, <4 x float> %b) { 649; SSE2-LABEL: blend_shufflevector_4xfloat: 650; SSE2: # %bb.0: # %entry 651; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3] 652; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3] 653; SSE2-NEXT: retq 654; 655; SSSE3-LABEL: blend_shufflevector_4xfloat: 656; SSSE3: # %bb.0: # %entry 657; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3] 658; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3] 659; SSSE3-NEXT: retq 660; 661; SSE41-LABEL: blend_shufflevector_4xfloat: 662; SSE41: # %bb.0: # %entry 663; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] 664; SSE41-NEXT: retq 665; 666; AVX-LABEL: blend_shufflevector_4xfloat: 667; AVX: # %bb.0: # %entry 668; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] 669; AVX-NEXT: retq 670entry: 671 %select = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 672 ret <4 x float> %select 673} 674 675define <8 x float> @blend_shufflevector_8xfloat(<8 x float> %a, <8 x float> %b) { 676; SSE2-LABEL: blend_shufflevector_8xfloat: 677; SSE2: # %bb.0: # %entry 678; SSE2-NEXT: movss {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3] 679; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm3[3,0] 680; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[0,1],xmm1[0,2] 681; SSE2-NEXT: movaps %xmm2, %xmm0 682; SSE2-NEXT: movaps %xmm3, %xmm1 683; SSE2-NEXT: retq 684; 685; SSSE3-LABEL: blend_shufflevector_8xfloat: 686; SSSE3: # %bb.0: # %entry 687; SSSE3-NEXT: movss {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3] 688; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm3[3,0] 689; SSSE3-NEXT: shufps {{.*#+}} xmm3 = xmm3[0,1],xmm1[0,2] 690; SSSE3-NEXT: movaps %xmm2, %xmm0 691; SSSE3-NEXT: movaps %xmm3, %xmm1 692; SSSE3-NEXT: retq 693; 694; SSE41-LABEL: blend_shufflevector_8xfloat: 695; SSE41: # %bb.0: # %entry 696; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3] 697; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm3[0,1],xmm1[2],xmm3[3] 698; SSE41-NEXT: retq 699; 700; AVX-LABEL: blend_shufflevector_8xfloat: 701; AVX: # %bb.0: # %entry 702; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5],ymm0[6],ymm1[7] 703; AVX-NEXT: retq 704entry: 705 %select = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 12, i32 13, i32 6, i32 15> 706 ret <8 x float> %select 707} 708 709define <4 x double> @blend_shufflevector_4xdouble(<4 x double> %a, <4 x double> %b) { 710; SSE2-LABEL: blend_shufflevector_4xdouble: 711; SSE2: # %bb.0: # %entry 712; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3] 713; SSE2-NEXT: retq 714; 715; SSSE3-LABEL: blend_shufflevector_4xdouble: 716; SSSE3: # %bb.0: # %entry 717; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3] 718; SSSE3-NEXT: retq 719; 720; SSE41-LABEL: blend_shufflevector_4xdouble: 721; SSE41: # %bb.0: # %entry 722; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3] 723; SSE41-NEXT: retq 724; 725; AVX-LABEL: blend_shufflevector_4xdouble: 726; AVX: # %bb.0: # %entry 727; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5,6,7] 728; AVX-NEXT: retq 729entry: 730 %select = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 3> 731 ret <4 x double> %select 732} 733 734define <4 x i64> @blend_shufflevector_4xi64(<4 x i64> %a, <4 x i64> %b) { 735; SSE2-LABEL: blend_shufflevector_4xi64: 736; SSE2: # %bb.0: # %entry 737; SSE2-NEXT: movaps %xmm3, %xmm1 738; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1] 739; SSE2-NEXT: retq 740; 741; SSSE3-LABEL: blend_shufflevector_4xi64: 742; SSSE3: # %bb.0: # %entry 743; SSSE3-NEXT: movaps %xmm3, %xmm1 744; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1] 745; SSSE3-NEXT: retq 746; 747; SSE41-LABEL: blend_shufflevector_4xi64: 748; SSE41: # %bb.0: # %entry 749; SSE41-NEXT: movaps %xmm3, %xmm1 750; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3] 751; SSE41-NEXT: retq 752; 753; AVX-LABEL: blend_shufflevector_4xi64: 754; AVX: # %bb.0: # %entry 755; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5,6,7] 756; AVX-NEXT: retq 757entry: 758 %select = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 7> 759 ret <4 x i64> %select 760} 761 762define <4 x i32> @blend_logic_v4i32(<4 x i32> %b, <4 x i32> %a, <4 x i32> %c) { 763; SSE2-LABEL: blend_logic_v4i32: 764; SSE2: # %bb.0: # %entry 765; SSE2-NEXT: psrad $31, %xmm0 766; SSE2-NEXT: pand %xmm0, %xmm1 767; SSE2-NEXT: pandn %xmm2, %xmm0 768; SSE2-NEXT: por %xmm1, %xmm0 769; SSE2-NEXT: retq 770; 771; SSSE3-LABEL: blend_logic_v4i32: 772; SSSE3: # %bb.0: # %entry 773; SSSE3-NEXT: psrad $31, %xmm0 774; SSSE3-NEXT: pand %xmm0, %xmm1 775; SSSE3-NEXT: pandn %xmm2, %xmm0 776; SSSE3-NEXT: por %xmm1, %xmm0 777; SSSE3-NEXT: retq 778; 779; SSE41-LABEL: blend_logic_v4i32: 780; SSE41: # %bb.0: # %entry 781; SSE41-NEXT: psrad $31, %xmm0 782; SSE41-NEXT: pblendvb %xmm0, %xmm1, %xmm2 783; SSE41-NEXT: movdqa %xmm2, %xmm0 784; SSE41-NEXT: retq 785; 786; AVX-LABEL: blend_logic_v4i32: 787; AVX: # %bb.0: # %entry 788; AVX-NEXT: vpsrad $31, %xmm0, %xmm0 789; AVX-NEXT: vpblendvb %xmm0, %xmm1, %xmm2, %xmm0 790; AVX-NEXT: retq 791entry: 792 %b.lobit = ashr <4 x i32> %b, <i32 31, i32 31, i32 31, i32 31> 793 %sub = sub nsw <4 x i32> zeroinitializer, %a 794 %0 = xor <4 x i32> %b.lobit, <i32 -1, i32 -1, i32 -1, i32 -1> 795 %1 = and <4 x i32> %c, %0 796 %2 = and <4 x i32> %a, %b.lobit 797 %cond = or <4 x i32> %1, %2 798 ret <4 x i32> %cond 799} 800 801define <8 x i32> @blend_logic_v8i32(<8 x i32> %b, <8 x i32> %a, <8 x i32> %c) { 802; SSE2-LABEL: blend_logic_v8i32: 803; SSE2: # %bb.0: # %entry 804; SSE2-NEXT: psrad $31, %xmm0 805; SSE2-NEXT: psrad $31, %xmm1 806; SSE2-NEXT: pand %xmm1, %xmm3 807; SSE2-NEXT: pandn %xmm5, %xmm1 808; SSE2-NEXT: por %xmm3, %xmm1 809; SSE2-NEXT: pand %xmm0, %xmm2 810; SSE2-NEXT: pandn %xmm4, %xmm0 811; SSE2-NEXT: por %xmm2, %xmm0 812; SSE2-NEXT: retq 813; 814; SSSE3-LABEL: blend_logic_v8i32: 815; SSSE3: # %bb.0: # %entry 816; SSSE3-NEXT: psrad $31, %xmm0 817; SSSE3-NEXT: psrad $31, %xmm1 818; SSSE3-NEXT: pand %xmm1, %xmm3 819; SSSE3-NEXT: pandn %xmm5, %xmm1 820; SSSE3-NEXT: por %xmm3, %xmm1 821; SSSE3-NEXT: pand %xmm0, %xmm2 822; SSSE3-NEXT: pandn %xmm4, %xmm0 823; SSSE3-NEXT: por %xmm2, %xmm0 824; SSSE3-NEXT: retq 825; 826; SSE41-LABEL: blend_logic_v8i32: 827; SSE41: # %bb.0: # %entry 828; SSE41-NEXT: psrad $31, %xmm1 829; SSE41-NEXT: psrad $31, %xmm0 830; SSE41-NEXT: pblendvb %xmm0, %xmm2, %xmm4 831; SSE41-NEXT: movdqa %xmm1, %xmm0 832; SSE41-NEXT: pblendvb %xmm0, %xmm3, %xmm5 833; SSE41-NEXT: movdqa %xmm4, %xmm0 834; SSE41-NEXT: movdqa %xmm5, %xmm1 835; SSE41-NEXT: retq 836; 837; AVX1-LABEL: blend_logic_v8i32: 838; AVX1: # %bb.0: # %entry 839; AVX1-NEXT: vpsrad $31, %xmm0, %xmm3 840; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 841; AVX1-NEXT: vpsrad $31, %xmm0, %xmm0 842; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0 843; AVX1-NEXT: vandnps %ymm2, %ymm0, %ymm2 844; AVX1-NEXT: vandps %ymm0, %ymm1, %ymm0 845; AVX1-NEXT: vorps %ymm0, %ymm2, %ymm0 846; AVX1-NEXT: retq 847; 848; AVX2-LABEL: blend_logic_v8i32: 849; AVX2: # %bb.0: # %entry 850; AVX2-NEXT: vpsrad $31, %ymm0, %ymm0 851; AVX2-NEXT: vpblendvb %ymm0, %ymm1, %ymm2, %ymm0 852; AVX2-NEXT: retq 853entry: 854 %b.lobit = ashr <8 x i32> %b, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31> 855 %sub = sub nsw <8 x i32> zeroinitializer, %a 856 %0 = xor <8 x i32> %b.lobit, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1> 857 %1 = and <8 x i32> %c, %0 858 %2 = and <8 x i32> %a, %b.lobit 859 %cond = or <8 x i32> %1, %2 860 ret <8 x i32> %cond 861} 862 863define <4 x i32> @blend_neg_logic_v4i32(<4 x i32> %a, <4 x i32> %b) { 864; SSE-LABEL: blend_neg_logic_v4i32: 865; SSE: # %bb.0: # %entry 866; SSE-NEXT: psrad $31, %xmm1 867; SSE-NEXT: pxor %xmm1, %xmm0 868; SSE-NEXT: psubd %xmm1, %xmm0 869; SSE-NEXT: retq 870; 871; AVX-LABEL: blend_neg_logic_v4i32: 872; AVX: # %bb.0: # %entry 873; AVX-NEXT: vpsrad $31, %xmm1, %xmm1 874; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 875; AVX-NEXT: vpsubd %xmm1, %xmm0, %xmm0 876; AVX-NEXT: retq 877entry: 878 %b.lobit = ashr <4 x i32> %b, <i32 31, i32 31, i32 31, i32 31> 879 %sub = sub nsw <4 x i32> zeroinitializer, %a 880 %0 = xor <4 x i32> %b.lobit, <i32 -1, i32 -1, i32 -1, i32 -1> 881 %1 = and <4 x i32> %a, %0 882 %2 = and <4 x i32> %b.lobit, %sub 883 %cond = or <4 x i32> %1, %2 884 ret <4 x i32> %cond 885} 886 887define <8 x i32> @blend_neg_logic_v8i32(<8 x i32> %a, <8 x i32> %b) { 888; SSE-LABEL: blend_neg_logic_v8i32: 889; SSE: # %bb.0: # %entry 890; SSE-NEXT: psrad $31, %xmm3 891; SSE-NEXT: psrad $31, %xmm2 892; SSE-NEXT: pxor %xmm2, %xmm0 893; SSE-NEXT: psubd %xmm2, %xmm0 894; SSE-NEXT: pxor %xmm3, %xmm1 895; SSE-NEXT: psubd %xmm3, %xmm1 896; SSE-NEXT: retq 897; 898; AVX1-LABEL: blend_neg_logic_v8i32: 899; AVX1: # %bb.0: # %entry 900; AVX1-NEXT: vpsrad $31, %xmm1, %xmm2 901; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 902; AVX1-NEXT: vpsrad $31, %xmm1, %xmm1 903; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 904; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 905; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 906; AVX1-NEXT: vpsubd %xmm2, %xmm3, %xmm2 907; AVX1-NEXT: vpsubd %xmm0, %xmm3, %xmm3 908; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 909; AVX1-NEXT: vandnps %ymm0, %ymm1, %ymm0 910; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1 911; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 912; AVX1-NEXT: retq 913; 914; AVX2-LABEL: blend_neg_logic_v8i32: 915; AVX2: # %bb.0: # %entry 916; AVX2-NEXT: vpsrad $31, %ymm1, %ymm1 917; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0 918; AVX2-NEXT: vpsubd %ymm1, %ymm0, %ymm0 919; AVX2-NEXT: retq 920entry: 921 %b.lobit = ashr <8 x i32> %b, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31> 922 %sub = sub nsw <8 x i32> zeroinitializer, %a 923 %0 = xor <8 x i32> %b.lobit, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1> 924 %1 = and <8 x i32> %a, %0 925 %2 = and <8 x i32> %b.lobit, %sub 926 %cond = or <8 x i32> %1, %2 927 ret <8 x i32> %cond 928} 929 930define <4 x i32> @blend_neg_logic_v4i32_2(<4 x i32> %v, <4 x i32> %c) { 931; SSE-LABEL: blend_neg_logic_v4i32_2: 932; SSE: # %bb.0: # %entry 933; SSE-NEXT: psrad $31, %xmm1 934; SSE-NEXT: pxor %xmm1, %xmm0 935; SSE-NEXT: psubd %xmm0, %xmm1 936; SSE-NEXT: movdqa %xmm1, %xmm0 937; SSE-NEXT: retq 938; 939; AVX-LABEL: blend_neg_logic_v4i32_2: 940; AVX: # %bb.0: # %entry 941; AVX-NEXT: vpsrad $31, %xmm1, %xmm1 942; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 943; AVX-NEXT: vpsubd %xmm0, %xmm1, %xmm0 944; AVX-NEXT: retq 945entry: 946 %0 = ashr <4 x i32> %c, <i32 31, i32 31, i32 31, i32 31> 947 %1 = trunc <4 x i32> %0 to <4 x i1> 948 %2 = sub nsw <4 x i32> zeroinitializer, %v 949 %3 = select <4 x i1> %1, <4 x i32> %v, <4 x i32> %2 950 ret <4 x i32> %3 951} 952