1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2 6 7; PR66101 - Fold select (sext m), (add X, C), X --> (add X, (and C, (sext m)))) 8define <4 x i32> @masked_select_const(<4 x i32> %a, <4 x i32> %x, <4 x i32> %y) { 9; SSE-LABEL: masked_select_const: 10; SSE: # %bb.0: 11; SSE-NEXT: pcmpgtd %xmm2, %xmm1 12; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 13; SSE-NEXT: paddd %xmm1, %xmm0 14; SSE-NEXT: retq 15; 16; AVX1-LABEL: masked_select_const: 17; AVX1: # %bb.0: 18; AVX1-NEXT: vpcmpgtd %xmm2, %xmm1, %xmm1 19; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 20; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 21; AVX1-NEXT: retq 22; 23; AVX2-LABEL: masked_select_const: 24; AVX2: # %bb.0: 25; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm3 = [4294967272,4294967272,4294967272,4294967272] 26; AVX2-NEXT: vpcmpgtd %xmm2, %xmm1, %xmm1 27; AVX2-NEXT: vpand %xmm3, %xmm1, %xmm1 28; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 29; AVX2-NEXT: retq 30 %sub.i = add <4 x i32> %a, <i32 -24, i32 -24, i32 -24, i32 -24> 31 %cmp.i = icmp sgt <4 x i32> %x, %y 32 %sel = select <4 x i1> %cmp.i, <4 x i32> %sub.i, <4 x i32> %a 33 ret <4 x i32> %sel 34} 35 36; Verify that we don't emit packed vector shifts instructions if the 37; condition used by the vector select is a vector of constants. 38 39define <4 x float> @test1(<4 x float> %a, <4 x float> %b) { 40; SSE2-LABEL: test1: 41; SSE2: # %bb.0: 42; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3] 43; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3] 44; SSE2-NEXT: retq 45; 46; SSE41-LABEL: test1: 47; SSE41: # %bb.0: 48; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] 49; SSE41-NEXT: retq 50; 51; AVX-LABEL: test1: 52; AVX: # %bb.0: 53; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] 54; AVX-NEXT: retq 55 %1 = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x float> %a, <4 x float> %b 56 ret <4 x float> %1 57} 58 59define <4 x float> @test2(<4 x float> %a, <4 x float> %b) { 60; SSE2-LABEL: test2: 61; SSE2: # %bb.0: 62; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 63; SSE2-NEXT: retq 64; 65; SSE41-LABEL: test2: 66; SSE41: # %bb.0: 67; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 68; SSE41-NEXT: retq 69; 70; AVX-LABEL: test2: 71; AVX: # %bb.0: 72; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 73; AVX-NEXT: retq 74 %1 = select <4 x i1> <i1 true, i1 true, i1 false, i1 false>, <4 x float> %a, <4 x float> %b 75 ret <4 x float> %1 76} 77 78define <4 x float> @test3(<4 x float> %a, <4 x float> %b) { 79; SSE2-LABEL: test3: 80; SSE2: # %bb.0: 81; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 82; SSE2-NEXT: retq 83; 84; SSE41-LABEL: test3: 85; SSE41: # %bb.0: 86; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 87; SSE41-NEXT: retq 88; 89; AVX-LABEL: test3: 90; AVX: # %bb.0: 91; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 92; AVX-NEXT: retq 93 %1 = select <4 x i1> <i1 false, i1 false, i1 true, i1 true>, <4 x float> %a, <4 x float> %b 94 ret <4 x float> %1 95} 96 97define <4 x float> @test4(<4 x float> %a, <4 x float> %b) { 98; SSE-LABEL: test4: 99; SSE: # %bb.0: 100; SSE-NEXT: movaps %xmm1, %xmm0 101; SSE-NEXT: retq 102; 103; AVX-LABEL: test4: 104; AVX: # %bb.0: 105; AVX-NEXT: vmovaps %xmm1, %xmm0 106; AVX-NEXT: retq 107 %1 = select <4 x i1> <i1 false, i1 false, i1 false, i1 false>, <4 x float> %a, <4 x float> %b 108 ret <4 x float> %1 109} 110 111define <4 x float> @test5(<4 x float> %a, <4 x float> %b) { 112; SSE-LABEL: test5: 113; SSE: # %bb.0: 114; SSE-NEXT: retq 115; 116; AVX-LABEL: test5: 117; AVX: # %bb.0: 118; AVX-NEXT: retq 119 %1 = select <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %b 120 ret <4 x float> %1 121} 122 123define <8 x i16> @test6(<8 x i16> %a, <8 x i16> %b) { 124; SSE-LABEL: test6: 125; SSE: # %bb.0: 126; SSE-NEXT: retq 127; 128; AVX-LABEL: test6: 129; AVX: # %bb.0: 130; AVX-NEXT: retq 131 %1 = select <8 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>, <8 x i16> %a, <8 x i16> %a 132 ret <8 x i16> %1 133} 134 135define <8 x i16> @test7(<8 x i16> %a, <8 x i16> %b) { 136; SSE2-LABEL: test7: 137; SSE2: # %bb.0: 138; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 139; SSE2-NEXT: retq 140; 141; SSE41-LABEL: test7: 142; SSE41: # %bb.0: 143; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 144; SSE41-NEXT: retq 145; 146; AVX-LABEL: test7: 147; AVX: # %bb.0: 148; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 149; AVX-NEXT: retq 150 %1 = select <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false>, <8 x i16> %a, <8 x i16> %b 151 ret <8 x i16> %1 152} 153 154define <8 x i16> @test8(<8 x i16> %a, <8 x i16> %b) { 155; SSE2-LABEL: test8: 156; SSE2: # %bb.0: 157; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 158; SSE2-NEXT: retq 159; 160; SSE41-LABEL: test8: 161; SSE41: # %bb.0: 162; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 163; SSE41-NEXT: retq 164; 165; AVX-LABEL: test8: 166; AVX: # %bb.0: 167; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 168; AVX-NEXT: retq 169 %1 = select <8 x i1> <i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true>, <8 x i16> %a, <8 x i16> %b 170 ret <8 x i16> %1 171} 172 173define <8 x i16> @test9(<8 x i16> %a, <8 x i16> %b) { 174; SSE-LABEL: test9: 175; SSE: # %bb.0: 176; SSE-NEXT: movaps %xmm1, %xmm0 177; SSE-NEXT: retq 178; 179; AVX-LABEL: test9: 180; AVX: # %bb.0: 181; AVX-NEXT: vmovaps %xmm1, %xmm0 182; AVX-NEXT: retq 183 %1 = select <8 x i1> <i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>, <8 x i16> %a, <8 x i16> %b 184 ret <8 x i16> %1 185} 186 187define <8 x i16> @test10(<8 x i16> %a, <8 x i16> %b) { 188; SSE-LABEL: test10: 189; SSE: # %bb.0: 190; SSE-NEXT: retq 191; 192; AVX-LABEL: test10: 193; AVX: # %bb.0: 194; AVX-NEXT: retq 195 %1 = select <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i16> %a, <8 x i16> %b 196 ret <8 x i16> %1 197} 198 199define <8 x i16> @test11(<8 x i16> %a, <8 x i16> %b) { 200; SSE2-LABEL: test11: 201; SSE2: # %bb.0: 202; SSE2-NEXT: movaps {{.*#+}} xmm2 = [0,65535,65535,0,0,65535,65535,0] 203; SSE2-NEXT: andps %xmm2, %xmm0 204; SSE2-NEXT: andnps %xmm1, %xmm2 205; SSE2-NEXT: orps %xmm2, %xmm0 206; SSE2-NEXT: retq 207; 208; SSE41-LABEL: test11: 209; SSE41: # %bb.0: 210; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1,2],xmm1[3,4],xmm0[5,6],xmm1[7] 211; SSE41-NEXT: retq 212; 213; AVX-LABEL: test11: 214; AVX: # %bb.0: 215; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1,2],xmm1[3,4],xmm0[5,6],xmm1[7] 216; AVX-NEXT: retq 217 %1 = select <8 x i1> <i1 false, i1 true, i1 true, i1 false, i1 undef, i1 true, i1 true, i1 undef>, <8 x i16> %a, <8 x i16> %b 218 ret <8 x i16> %1 219} 220 221define <8 x i16> @test12(<8 x i16> %a, <8 x i16> %b) { 222; SSE-LABEL: test12: 223; SSE: # %bb.0: 224; SSE-NEXT: movaps %xmm1, %xmm0 225; SSE-NEXT: retq 226; 227; AVX-LABEL: test12: 228; AVX: # %bb.0: 229; AVX-NEXT: vmovaps %xmm1, %xmm0 230; AVX-NEXT: retq 231 %1 = select <8 x i1> <i1 false, i1 false, i1 undef, i1 false, i1 false, i1 false, i1 false, i1 undef>, <8 x i16> %a, <8 x i16> %b 232 ret <8 x i16> %1 233} 234 235define <8 x i16> @test13(<8 x i16> %a, <8 x i16> %b) { 236; SSE-LABEL: test13: 237; SSE: # %bb.0: 238; SSE-NEXT: movaps %xmm1, %xmm0 239; SSE-NEXT: retq 240; 241; AVX-LABEL: test13: 242; AVX: # %bb.0: 243; AVX-NEXT: vmovaps %xmm1, %xmm0 244; AVX-NEXT: retq 245 %1 = select <8 x i1> <i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef>, <8 x i16> %a, <8 x i16> %b 246 ret <8 x i16> %1 247} 248 249; Fold (vselect (build_vector AllOnes), N1, N2) -> N1 250define <4 x float> @test14(<4 x float> %a, <4 x float> %b) { 251; SSE-LABEL: test14: 252; SSE: # %bb.0: 253; SSE-NEXT: retq 254; 255; AVX-LABEL: test14: 256; AVX: # %bb.0: 257; AVX-NEXT: retq 258 %1 = select <4 x i1> <i1 true, i1 undef, i1 true, i1 undef>, <4 x float> %a, <4 x float> %b 259 ret <4 x float> %1 260} 261 262define <8 x i16> @test15(<8 x i16> %a, <8 x i16> %b) { 263; SSE-LABEL: test15: 264; SSE: # %bb.0: 265; SSE-NEXT: retq 266; 267; AVX-LABEL: test15: 268; AVX: # %bb.0: 269; AVX-NEXT: retq 270 %1 = select <8 x i1> <i1 true, i1 true, i1 true, i1 undef, i1 undef, i1 true, i1 true, i1 undef>, <8 x i16> %a, <8 x i16> %b 271 ret <8 x i16> %1 272} 273 274; Fold (vselect (build_vector AllZeros), N1, N2) -> N2 275define <4 x float> @test16(<4 x float> %a, <4 x float> %b) { 276; SSE-LABEL: test16: 277; SSE: # %bb.0: 278; SSE-NEXT: movaps %xmm1, %xmm0 279; SSE-NEXT: retq 280; 281; AVX-LABEL: test16: 282; AVX: # %bb.0: 283; AVX-NEXT: vmovaps %xmm1, %xmm0 284; AVX-NEXT: retq 285 %1 = select <4 x i1> <i1 false, i1 undef, i1 false, i1 undef>, <4 x float> %a, <4 x float> %b 286 ret <4 x float> %1 287} 288 289define <8 x i16> @test17(<8 x i16> %a, <8 x i16> %b) { 290; SSE-LABEL: test17: 291; SSE: # %bb.0: 292; SSE-NEXT: movaps %xmm1, %xmm0 293; SSE-NEXT: retq 294; 295; AVX-LABEL: test17: 296; AVX: # %bb.0: 297; AVX-NEXT: vmovaps %xmm1, %xmm0 298; AVX-NEXT: retq 299 %1 = select <8 x i1> <i1 false, i1 false, i1 false, i1 undef, i1 undef, i1 false, i1 false, i1 undef>, <8 x i16> %a, <8 x i16> %b 300 ret <8 x i16> %1 301} 302 303define <4 x float> @test18(<4 x float> %a, <4 x float> %b) { 304; SSE2-LABEL: test18: 305; SSE2: # %bb.0: 306; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 307; SSE2-NEXT: retq 308; 309; SSE41-LABEL: test18: 310; SSE41: # %bb.0: 311; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 312; SSE41-NEXT: retq 313; 314; AVX-LABEL: test18: 315; AVX: # %bb.0: 316; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 317; AVX-NEXT: retq 318 %1 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %b 319 ret <4 x float> %1 320} 321 322define <4 x i32> @test19(<4 x i32> %a, <4 x i32> %b) { 323; SSE2-LABEL: test19: 324; SSE2: # %bb.0: 325; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 326; SSE2-NEXT: retq 327; 328; SSE41-LABEL: test19: 329; SSE41: # %bb.0: 330; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 331; SSE41-NEXT: retq 332; 333; AVX-LABEL: test19: 334; AVX: # %bb.0: 335; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 336; AVX-NEXT: retq 337 %1 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x i32> %a, <4 x i32> %b 338 ret <4 x i32> %1 339} 340 341define <2 x double> @test20(<2 x double> %a, <2 x double> %b) { 342; SSE2-LABEL: test20: 343; SSE2: # %bb.0: 344; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 345; SSE2-NEXT: retq 346; 347; SSE41-LABEL: test20: 348; SSE41: # %bb.0: 349; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 350; SSE41-NEXT: retq 351; 352; AVX-LABEL: test20: 353; AVX: # %bb.0: 354; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 355; AVX-NEXT: retq 356 %1 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %b 357 ret <2 x double> %1 358} 359 360define <2 x i64> @test21(<2 x i64> %a, <2 x i64> %b) { 361; SSE2-LABEL: test21: 362; SSE2: # %bb.0: 363; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 364; SSE2-NEXT: retq 365; 366; SSE41-LABEL: test21: 367; SSE41: # %bb.0: 368; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 369; SSE41-NEXT: retq 370; 371; AVX-LABEL: test21: 372; AVX: # %bb.0: 373; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 374; AVX-NEXT: retq 375 %1 = select <2 x i1> <i1 false, i1 true>, <2 x i64> %a, <2 x i64> %b 376 ret <2 x i64> %1 377} 378 379define <4 x float> @test22(<4 x float> %a, <4 x float> %b) { 380; SSE2-LABEL: test22: 381; SSE2: # %bb.0: 382; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] 383; SSE2-NEXT: movaps %xmm1, %xmm0 384; SSE2-NEXT: retq 385; 386; SSE41-LABEL: test22: 387; SSE41: # %bb.0: 388; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] 389; SSE41-NEXT: retq 390; 391; AVX-LABEL: test22: 392; AVX: # %bb.0: 393; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] 394; AVX-NEXT: retq 395 %1 = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x float> %a, <4 x float> %b 396 ret <4 x float> %1 397} 398 399define <4 x i32> @test23(<4 x i32> %a, <4 x i32> %b) { 400; SSE2-LABEL: test23: 401; SSE2: # %bb.0: 402; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] 403; SSE2-NEXT: movaps %xmm1, %xmm0 404; SSE2-NEXT: retq 405; 406; SSE41-LABEL: test23: 407; SSE41: # %bb.0: 408; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] 409; SSE41-NEXT: retq 410; 411; AVX-LABEL: test23: 412; AVX: # %bb.0: 413; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] 414; AVX-NEXT: retq 415 %1 = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x i32> %a, <4 x i32> %b 416 ret <4 x i32> %1 417} 418 419define <2 x double> @test24(<2 x double> %a, <2 x double> %b) { 420; SSE2-LABEL: test24: 421; SSE2: # %bb.0: 422; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 423; SSE2-NEXT: retq 424; 425; SSE41-LABEL: test24: 426; SSE41: # %bb.0: 427; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 428; SSE41-NEXT: retq 429; 430; AVX-LABEL: test24: 431; AVX: # %bb.0: 432; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 433; AVX-NEXT: retq 434 %1 = select <2 x i1> <i1 true, i1 false>, <2 x double> %a, <2 x double> %b 435 ret <2 x double> %1 436} 437 438define <2 x i64> @test25(<2 x i64> %a, <2 x i64> %b) { 439; SSE2-LABEL: test25: 440; SSE2: # %bb.0: 441; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 442; SSE2-NEXT: retq 443; 444; SSE41-LABEL: test25: 445; SSE41: # %bb.0: 446; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 447; SSE41-NEXT: retq 448; 449; AVX-LABEL: test25: 450; AVX: # %bb.0: 451; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 452; AVX-NEXT: retq 453 %1 = select <2 x i1> <i1 true, i1 false>, <2 x i64> %a, <2 x i64> %b 454 ret <2 x i64> %1 455} 456 457define <16 x i8> @test26(<16 x i8> %a, <16 x i8> %b) { 458; SSE2-LABEL: test26: 459; SSE2: # %bb.0: 460; SSE2-NEXT: movaps {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] 461; SSE2-NEXT: andps %xmm2, %xmm1 462; SSE2-NEXT: andnps %xmm0, %xmm2 463; SSE2-NEXT: orps %xmm1, %xmm2 464; SSE2-NEXT: movaps %xmm2, %xmm0 465; SSE2-NEXT: retq 466; 467; SSE41-LABEL: test26: 468; SSE41: # %bb.0: 469; SSE41-NEXT: movdqa %xmm0, %xmm2 470; SSE41-NEXT: movaps {{.*#+}} xmm0 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] 471; SSE41-NEXT: pblendvb %xmm0, %xmm1, %xmm2 472; SSE41-NEXT: movdqa %xmm2, %xmm0 473; SSE41-NEXT: retq 474; 475; AVX1-LABEL: test26: 476; AVX1: # %bb.0: 477; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] 478; AVX1-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 479; AVX1-NEXT: retq 480; 481; AVX2-LABEL: test26: 482; AVX2: # %bb.0: 483; AVX2-NEXT: vpbroadcastw {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] 484; AVX2-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 485; AVX2-NEXT: retq 486 %1 = select <16 x i1> <i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true>, <16 x i8> %a, <16 x i8> %b 487 ret <16 x i8> %1 488} 489 490define <32 x i8> @test27(<32 x i8> %a, <32 x i8> %b) { 491; SSE2-LABEL: test27: 492; SSE2: # %bb.0: 493; SSE2-NEXT: movaps {{.*#+}} xmm4 = [255,0,0,255,255,0,0,255,255,0,0,255,255,0,0,255] 494; SSE2-NEXT: movaps %xmm4, %xmm5 495; SSE2-NEXT: andnps %xmm2, %xmm5 496; SSE2-NEXT: andps %xmm4, %xmm0 497; SSE2-NEXT: orps %xmm5, %xmm0 498; SSE2-NEXT: andps %xmm4, %xmm1 499; SSE2-NEXT: andnps %xmm3, %xmm4 500; SSE2-NEXT: orps %xmm4, %xmm1 501; SSE2-NEXT: retq 502; 503; SSE41-LABEL: test27: 504; SSE41: # %bb.0: 505; SSE41-NEXT: movdqa %xmm0, %xmm4 506; SSE41-NEXT: movaps {{.*#+}} xmm0 = [255,0,0,255,255,0,0,255,255,0,0,255,255,0,0,255] 507; SSE41-NEXT: pblendvb %xmm0, %xmm4, %xmm2 508; SSE41-NEXT: pblendvb %xmm0, %xmm1, %xmm3 509; SSE41-NEXT: movdqa %xmm2, %xmm0 510; SSE41-NEXT: movdqa %xmm3, %xmm1 511; SSE41-NEXT: retq 512; 513; AVX1-LABEL: test27: 514; AVX1: # %bb.0: 515; AVX1-NEXT: vbroadcastss {{.*#+}} ymm2 = [255,0,0,255,255,0,0,255,255,0,0,255,255,0,0,255,255,0,0,255,255,0,0,255,255,0,0,255,255,0,0,255] 516; AVX1-NEXT: vandnps %ymm1, %ymm2, %ymm1 517; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0 518; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 519; AVX1-NEXT: retq 520; 521; AVX2-LABEL: test27: 522; AVX2: # %bb.0: 523; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [255,0,0,255,255,0,0,255,255,0,0,255,255,0,0,255,255,0,0,255,255,0,0,255,255,0,0,255,255,0,0,255] 524; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 525; AVX2-NEXT: retq 526 %1 = select <32 x i1> <i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 false, i1 true>, <32 x i8> %a, <32 x i8> %b 527 ret <32 x i8> %1 528} 529 530define <4 x float> @select_of_shuffles_0(<2 x float> %a0, <2 x float> %b0, <2 x float> %a1, <2 x float> %b1) { 531; SSE-LABEL: select_of_shuffles_0: 532; SSE: # %bb.0: 533; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0] 534; SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm3[0] 535; SSE-NEXT: subps %xmm1, %xmm0 536; SSE-NEXT: retq 537; 538; AVX-LABEL: select_of_shuffles_0: 539; AVX: # %bb.0: 540; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0] 541; AVX-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm3[0] 542; AVX-NEXT: vsubps %xmm1, %xmm0, %xmm0 543; AVX-NEXT: retq 544 %1 = shufflevector <2 x float> %a0, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 545 %2 = shufflevector <2 x float> %a1, <2 x float> undef, <4 x i32> <i32 undef, i32 undef, i32 0, i32 1> 546 %3 = select <4 x i1> <i1 false, i1 false, i1 true, i1 true>, <4 x float> %2, <4 x float> %1 547 %4 = shufflevector <2 x float> %b0, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 548 %5 = shufflevector <2 x float> %b1, <2 x float> undef, <4 x i32> <i32 undef, i32 undef, i32 0, i32 1> 549 %6 = select <4 x i1> <i1 false, i1 false, i1 true, i1 true>, <4 x float> %5, <4 x float> %4 550 %7 = fsub <4 x float> %3, %6 551 ret <4 x float> %7 552} 553 554; PR20677 555define <16 x double> @select_illegal(<16 x double> %a, <16 x double> %b) { 556; SSE-LABEL: select_illegal: 557; SSE: # %bb.0: 558; SSE-NEXT: movq %rdi, %rax 559; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm4 560; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm5 561; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm6 562; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm7 563; SSE-NEXT: movaps %xmm7, 112(%rdi) 564; SSE-NEXT: movaps %xmm6, 96(%rdi) 565; SSE-NEXT: movaps %xmm5, 80(%rdi) 566; SSE-NEXT: movaps %xmm4, 64(%rdi) 567; SSE-NEXT: movaps %xmm3, 48(%rdi) 568; SSE-NEXT: movaps %xmm2, 32(%rdi) 569; SSE-NEXT: movaps %xmm1, 16(%rdi) 570; SSE-NEXT: movaps %xmm0, (%rdi) 571; SSE-NEXT: retq 572; 573; AVX-LABEL: select_illegal: 574; AVX: # %bb.0: 575; AVX-NEXT: vmovaps %ymm7, %ymm3 576; AVX-NEXT: vmovaps %ymm6, %ymm2 577; AVX-NEXT: retq 578 %sel = select <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>, <16 x double> %a, <16 x double> %b 579 ret <16 x double> %sel 580} 581 582; Make sure we can optimize the condition MSB when it is used by 2 selects. 583; The v2i1 here will be passed as v2i64 and we will emit a sign_extend_inreg to fill the upper bits. 584; We should be able to remove the sra from the sign_extend_inreg to leave only shl. 585define <2 x i64> @shrunkblend_2uses(<2 x i1> %cond, <2 x i64> %a, <2 x i64> %b, <2 x i64> %c, <2 x i64> %d) { 586; SSE2-LABEL: shrunkblend_2uses: 587; SSE2: # %bb.0: 588; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2] 589; SSE2-NEXT: pslld $31, %xmm0 590; SSE2-NEXT: psrad $31, %xmm0 591; SSE2-NEXT: movdqa %xmm0, %xmm5 592; SSE2-NEXT: pandn %xmm2, %xmm5 593; SSE2-NEXT: pand %xmm0, %xmm1 594; SSE2-NEXT: por %xmm1, %xmm5 595; SSE2-NEXT: pand %xmm0, %xmm3 596; SSE2-NEXT: pandn %xmm4, %xmm0 597; SSE2-NEXT: por %xmm3, %xmm0 598; SSE2-NEXT: paddq %xmm5, %xmm0 599; SSE2-NEXT: retq 600; 601; SSE41-LABEL: shrunkblend_2uses: 602; SSE41: # %bb.0: 603; SSE41-NEXT: psllq $63, %xmm0 604; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm2 605; SSE41-NEXT: blendvpd %xmm0, %xmm3, %xmm4 606; SSE41-NEXT: paddq %xmm2, %xmm4 607; SSE41-NEXT: movdqa %xmm4, %xmm0 608; SSE41-NEXT: retq 609; 610; AVX-LABEL: shrunkblend_2uses: 611; AVX: # %bb.0: 612; AVX-NEXT: vpsllq $63, %xmm0, %xmm0 613; AVX-NEXT: vblendvpd %xmm0, %xmm1, %xmm2, %xmm1 614; AVX-NEXT: vblendvpd %xmm0, %xmm3, %xmm4, %xmm0 615; AVX-NEXT: vpaddq %xmm0, %xmm1, %xmm0 616; AVX-NEXT: retq 617 %x = select <2 x i1> %cond, <2 x i64> %a, <2 x i64> %b 618 %y = select <2 x i1> %cond, <2 x i64> %c, <2 x i64> %d 619 %z = add <2 x i64> %x, %y 620 ret <2 x i64> %z 621} 622 623; Similar to above, but condition has a use that isn't a condition of a vselect so we can't optimize. 624define <2 x i64> @shrunkblend_nonvselectuse(<2 x i1> %cond, <2 x i64> %a, <2 x i64> %b, <2 x i64> %c, <2 x i64> %d) { 625; SSE2-LABEL: shrunkblend_nonvselectuse: 626; SSE2: # %bb.0: 627; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[0,0,2,2] 628; SSE2-NEXT: pslld $31, %xmm3 629; SSE2-NEXT: psrad $31, %xmm3 630; SSE2-NEXT: movdqa %xmm3, %xmm0 631; SSE2-NEXT: pandn %xmm2, %xmm0 632; SSE2-NEXT: pand %xmm3, %xmm1 633; SSE2-NEXT: por %xmm1, %xmm0 634; SSE2-NEXT: paddq %xmm3, %xmm0 635; SSE2-NEXT: retq 636; 637; SSE41-LABEL: shrunkblend_nonvselectuse: 638; SSE41: # %bb.0: 639; SSE41-NEXT: psllq $63, %xmm0 640; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm2 641; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 642; SSE41-NEXT: psrad $31, %xmm0 643; SSE41-NEXT: paddq %xmm2, %xmm0 644; SSE41-NEXT: retq 645; 646; AVX-LABEL: shrunkblend_nonvselectuse: 647; AVX: # %bb.0: 648; AVX-NEXT: vpsllq $63, %xmm0, %xmm0 649; AVX-NEXT: vblendvpd %xmm0, %xmm1, %xmm2, %xmm1 650; AVX-NEXT: vxorpd %xmm2, %xmm2, %xmm2 651; AVX-NEXT: vpcmpgtq %xmm0, %xmm2, %xmm0 652; AVX-NEXT: vpaddq %xmm0, %xmm1, %xmm0 653; AVX-NEXT: retq 654 %x = select <2 x i1> %cond, <2 x i64> %a, <2 x i64> %b 655 %y = sext <2 x i1> %cond to <2 x i64> 656 %z = add <2 x i64> %x, %y 657 ret <2 x i64> %z 658} 659 660; This turns into a SHRUNKBLEND with SSE4 or later, and via 661; late shuffle magic, both sides of the blend are the same 662; value. If that is not simplified before isel, it can fail 663; to match (crash). 664 665define <2 x i32> @simplify_select(i32 %x, <2 x i1> %z) { 666; SSE2-LABEL: simplify_select: 667; SSE2: # %bb.0: 668; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 669; SSE2-NEXT: pslld $31, %xmm0 670; SSE2-NEXT: psrad $31, %xmm0 671; SSE2-NEXT: movd %edi, %xmm1 672; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,0,1,1] 673; SSE2-NEXT: por %xmm1, %xmm2 674; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0,0] 675; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm2[1,1] 676; SSE2-NEXT: pand %xmm0, %xmm2 677; SSE2-NEXT: pandn %xmm1, %xmm0 678; SSE2-NEXT: por %xmm2, %xmm0 679; SSE2-NEXT: retq 680; 681; SSE41-LABEL: simplify_select: 682; SSE41: # %bb.0: 683; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 684; SSE41-NEXT: pslld $31, %xmm0 685; SSE41-NEXT: movd %edi, %xmm1 686; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,0,1,1] 687; SSE41-NEXT: por %xmm1, %xmm2 688; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,1,1] 689; SSE41-NEXT: pinsrd $1, %edi, %xmm1 690; SSE41-NEXT: blendvps %xmm0, %xmm2, %xmm1 691; SSE41-NEXT: movaps %xmm1, %xmm0 692; SSE41-NEXT: retq 693; 694; AVX-LABEL: simplify_select: 695; AVX: # %bb.0: 696; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 697; AVX-NEXT: vpslld $31, %xmm0, %xmm0 698; AVX-NEXT: vmovd %edi, %xmm1 699; AVX-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,0,1,1] 700; AVX-NEXT: vpor %xmm1, %xmm2, %xmm1 701; AVX-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,1,1] 702; AVX-NEXT: vpinsrd $1, %edi, %xmm2, %xmm2 703; AVX-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0 704; AVX-NEXT: retq 705 %a = insertelement <2 x i32> <i32 0, i32 undef>, i32 %x, i32 1 706 %b = insertelement <2 x i32> <i32 undef, i32 0>, i32 %x, i32 0 707 %y = or <2 x i32> %a, %b 708 %p16 = extractelement <2 x i32> %y, i32 1 709 %p17 = insertelement <2 x i32> undef, i32 %p16, i32 0 710 %p18 = insertelement <2 x i32> %p17, i32 %x, i32 1 711 %r = select <2 x i1> %z, <2 x i32> %y, <2 x i32> %p18 712 ret <2 x i32> %r 713} 714 715; Test to make sure we don't try to insert a new setcc to swap the operands 716; of select with all zeros LHS if the setcc has additional users. 717define void @vselect_allzeros_LHS_multiple_use_setcc(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z, ptr %p1, ptr %p2) { 718; SSE2-LABEL: vselect_allzeros_LHS_multiple_use_setcc: 719; SSE2: # %bb.0: 720; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [1,2,4,8] 721; SSE2-NEXT: pand %xmm3, %xmm0 722; SSE2-NEXT: pcmpeqd %xmm3, %xmm0 723; SSE2-NEXT: movdqa %xmm0, %xmm3 724; SSE2-NEXT: pandn %xmm1, %xmm3 725; SSE2-NEXT: pand %xmm2, %xmm0 726; SSE2-NEXT: movdqa %xmm3, (%rdi) 727; SSE2-NEXT: movdqa %xmm0, (%rsi) 728; SSE2-NEXT: retq 729; 730; SSE41-LABEL: vselect_allzeros_LHS_multiple_use_setcc: 731; SSE41: # %bb.0: 732; SSE41-NEXT: pmovsxbd {{.*#+}} xmm3 = [1,2,4,8] 733; SSE41-NEXT: pand %xmm3, %xmm0 734; SSE41-NEXT: pcmpeqd %xmm3, %xmm0 735; SSE41-NEXT: movdqa %xmm0, %xmm3 736; SSE41-NEXT: pandn %xmm1, %xmm3 737; SSE41-NEXT: pand %xmm2, %xmm0 738; SSE41-NEXT: movdqa %xmm3, (%rdi) 739; SSE41-NEXT: movdqa %xmm0, (%rsi) 740; SSE41-NEXT: retq 741; 742; AVX-LABEL: vselect_allzeros_LHS_multiple_use_setcc: 743; AVX: # %bb.0: 744; AVX-NEXT: vpmovsxbd {{.*#+}} xmm3 = [1,2,4,8] 745; AVX-NEXT: vpand %xmm3, %xmm0, %xmm0 746; AVX-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0 747; AVX-NEXT: vpandn %xmm1, %xmm0, %xmm1 748; AVX-NEXT: vpand %xmm2, %xmm0, %xmm0 749; AVX-NEXT: vmovdqa %xmm1, (%rdi) 750; AVX-NEXT: vmovdqa %xmm0, (%rsi) 751; AVX-NEXT: retq 752 %and = and <4 x i32> %x, <i32 1, i32 2, i32 4, i32 8> 753 %cond = icmp ne <4 x i32> %and, zeroinitializer 754 %sel1 = select <4 x i1> %cond, <4 x i32> zeroinitializer, <4 x i32> %y 755 %sel2 = select <4 x i1> %cond, <4 x i32> %z, <4 x i32> zeroinitializer 756 store <4 x i32> %sel1, ptr %p1 757 store <4 x i32> %sel2, ptr %p2 758 ret void 759} 760 761; This test case previously crashed after r363802, r363850, and r363856 due 762; any_extend_vector_inreg not being handled by the X86 backend. 763define i64 @vselect_any_extend_vector_inreg_crash(ptr %x) { 764; SSE-LABEL: vselect_any_extend_vector_inreg_crash: 765; SSE: # %bb.0: 766; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 767; SSE-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 768; SSE-NEXT: movd %xmm0, %eax 769; SSE-NEXT: andl $1, %eax 770; SSE-NEXT: shll $15, %eax 771; SSE-NEXT: retq 772; 773; AVX1-LABEL: vselect_any_extend_vector_inreg_crash: 774; AVX1: # %bb.0: 775; AVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero 776; AVX1-NEXT: vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 777; AVX1-NEXT: vmovd %xmm0, %eax 778; AVX1-NEXT: andl $1, %eax 779; AVX1-NEXT: shll $15, %eax 780; AVX1-NEXT: retq 781; 782; AVX2-LABEL: vselect_any_extend_vector_inreg_crash: 783; AVX2: # %bb.0: 784; AVX2-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero 785; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [49,49,49,49] 786; AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 787; AVX2-NEXT: vmovd %xmm0, %eax 788; AVX2-NEXT: andl $1, %eax 789; AVX2-NEXT: shll $15, %eax 790; AVX2-NEXT: retq 7910: 792 %1 = load <8 x i8>, ptr %x 793 %2 = icmp eq <8 x i8> %1, <i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49> 794 %3 = select <8 x i1> %2, <8 x i64> <i64 32768, i64 16384, i64 8192, i64 4096, i64 2048, i64 1024, i64 512, i64 256>, <8 x i64> zeroinitializer 795 %4 = extractelement <8 x i64> %3, i32 0 796 ret i64 %4 797} 798 799; Tests the scalarizeBinOp code in DAGCombiner 800define void @scalarize_binop(<1 x i1> %a) { 801; SSE-LABEL: scalarize_binop: 802; SSE: # %bb.0: # %bb0 803; SSE-NEXT: .p2align 4 804; SSE-NEXT: .LBB35_1: # %bb1 805; SSE-NEXT: # =>This Inner Loop Header: Depth=1 806; SSE-NEXT: jmp .LBB35_1 807; 808; AVX-LABEL: scalarize_binop: 809; AVX: # %bb.0: # %bb0 810; AVX-NEXT: .p2align 4 811; AVX-NEXT: .LBB35_1: # %bb1 812; AVX-NEXT: # =>This Inner Loop Header: Depth=1 813; AVX-NEXT: jmp .LBB35_1 814bb0: 815 br label %bb1 816 817bb1: 818 %b = select <1 x i1> %a, <1 x i1> zeroinitializer, <1 x i1> splat (i1 true) 819 br label %bb2 820 821bb2: 822 %c = extractelement <1 x i1> %b, i32 0 823 br label %bb1 824} 825