1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 < %s | FileCheck %s --check-prefixes=CHECK,SSE 3; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+avx < %s | FileCheck %s --check-prefixes=CHECK,AVX,AVX1 4; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+avx2 < %s | FileCheck %s --check-prefixes=CHECK,AVX,AVX2 5; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512dq,+avx512bw < %s | FileCheck %s --check-prefixes=CHECK,AVX,AVX512 6 7define i32 @and_self(i32 %x) { 8; CHECK-LABEL: and_self: 9; CHECK: # %bb.0: 10; CHECK-NEXT: movl %edi, %eax 11; CHECK-NEXT: retq 12 %and = and i32 %x, %x 13 ret i32 %and 14} 15 16define <4 x i32> @and_self_vec(<4 x i32> %x) { 17; CHECK-LABEL: and_self_vec: 18; CHECK: # %bb.0: 19; CHECK-NEXT: retq 20 %and = and <4 x i32> %x, %x 21 ret <4 x i32> %and 22} 23 24; 25; Verify that the DAGCombiner is able to fold a vector AND into a blend 26; if one of the operands to the AND is a vector of all constants, and each 27; constant element is either zero or all-ones. 28; 29 30define <4 x i32> @test1(<4 x i32> %A) { 31; SSE-LABEL: test1: 32; SSE: # %bb.0: 33; SSE-NEXT: xorps %xmm1, %xmm1 34; SSE-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] 35; SSE-NEXT: retq 36; 37; AVX-LABEL: test1: 38; AVX: # %bb.0: 39; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 40; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] 41; AVX-NEXT: retq 42 %1 = and <4 x i32> %A, <i32 -1, i32 0, i32 0, i32 0> 43 ret <4 x i32> %1 44} 45 46define <4 x i32> @test2(<4 x i32> %A) { 47; SSE-LABEL: test2: 48; SSE: # %bb.0: 49; SSE-NEXT: xorps %xmm1, %xmm1 50; SSE-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3] 51; SSE-NEXT: retq 52; 53; AVX-LABEL: test2: 54; AVX: # %bb.0: 55; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 56; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3] 57; AVX-NEXT: retq 58 %1 = and <4 x i32> %A, <i32 0, i32 -1, i32 0, i32 0> 59 ret <4 x i32> %1 60} 61 62define <4 x i32> @test3(<4 x i32> %A) { 63; SSE-LABEL: test3: 64; SSE: # %bb.0: 65; SSE-NEXT: xorps %xmm1, %xmm1 66; SSE-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2],xmm1[3] 67; SSE-NEXT: retq 68; 69; AVX-LABEL: test3: 70; AVX: # %bb.0: 71; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 72; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2],xmm1[3] 73; AVX-NEXT: retq 74 %1 = and <4 x i32> %A, <i32 0, i32 0, i32 -1, i32 0> 75 ret <4 x i32> %1 76} 77 78define <4 x i32> @test4(<4 x i32> %A) { 79; SSE-LABEL: test4: 80; SSE: # %bb.0: 81; SSE-NEXT: xorps %xmm1, %xmm1 82; SSE-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3] 83; SSE-NEXT: retq 84; 85; AVX-LABEL: test4: 86; AVX: # %bb.0: 87; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 88; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3] 89; AVX-NEXT: retq 90 %1 = and <4 x i32> %A, <i32 0, i32 0, i32 0, i32 -1> 91 ret <4 x i32> %1 92} 93 94define <4 x i32> @test5(<4 x i32> %A) { 95; SSE-LABEL: test5: 96; SSE: # %bb.0: 97; SSE-NEXT: xorps %xmm1, %xmm1 98; SSE-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] 99; SSE-NEXT: retq 100; 101; AVX-LABEL: test5: 102; AVX: # %bb.0: 103; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 104; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] 105; AVX-NEXT: retq 106 %1 = and <4 x i32> %A, <i32 -1, i32 0, i32 -1, i32 0> 107 ret <4 x i32> %1 108} 109 110define <4 x i32> @test6(<4 x i32> %A) { 111; SSE-LABEL: test6: 112; SSE: # %bb.0: 113; SSE-NEXT: xorps %xmm1, %xmm1 114; SSE-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3] 115; SSE-NEXT: retq 116; 117; AVX-LABEL: test6: 118; AVX: # %bb.0: 119; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 120; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3] 121; AVX-NEXT: retq 122 %1 = and <4 x i32> %A, <i32 0, i32 -1, i32 0, i32 -1> 123 ret <4 x i32> %1 124} 125 126define <4 x i32> @test7(<4 x i32> %A) { 127; SSE-LABEL: test7: 128; SSE: # %bb.0: 129; SSE-NEXT: xorps %xmm1, %xmm1 130; SSE-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 131; SSE-NEXT: retq 132; 133; AVX-LABEL: test7: 134; AVX: # %bb.0: 135; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 136; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 137; AVX-NEXT: retq 138 %1 = and <4 x i32> %A, <i32 0, i32 0, i32 -1, i32 -1> 139 ret <4 x i32> %1 140} 141 142define <4 x i32> @test8(<4 x i32> %A) { 143; SSE-LABEL: test8: 144; SSE: # %bb.0: 145; SSE-NEXT: xorps %xmm1, %xmm1 146; SSE-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] 147; SSE-NEXT: retq 148; 149; AVX-LABEL: test8: 150; AVX: # %bb.0: 151; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 152; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] 153; AVX-NEXT: retq 154 %1 = and <4 x i32> %A, <i32 -1, i32 0, i32 0, i32 -1> 155 ret <4 x i32> %1 156} 157 158define <4 x i32> @test9(<4 x i32> %A) { 159; SSE-LABEL: test9: 160; SSE: # %bb.0: 161; SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero 162; SSE-NEXT: retq 163; 164; AVX-LABEL: test9: 165; AVX: # %bb.0: 166; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 167; AVX-NEXT: retq 168 %1 = and <4 x i32> %A, <i32 -1, i32 -1, i32 0, i32 0> 169 ret <4 x i32> %1 170} 171 172define <4 x i32> @test10(<4 x i32> %A) { 173; SSE-LABEL: test10: 174; SSE: # %bb.0: 175; SSE-NEXT: xorps %xmm1, %xmm1 176; SSE-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2],xmm1[3] 177; SSE-NEXT: retq 178; 179; AVX-LABEL: test10: 180; AVX: # %bb.0: 181; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 182; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2],xmm1[3] 183; AVX-NEXT: retq 184 %1 = and <4 x i32> %A, <i32 0, i32 -1, i32 -1, i32 0> 185 ret <4 x i32> %1 186} 187 188define <4 x i32> @test11(<4 x i32> %A) { 189; SSE-LABEL: test11: 190; SSE: # %bb.0: 191; SSE-NEXT: xorps %xmm1, %xmm1 192; SSE-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 193; SSE-NEXT: retq 194; 195; AVX-LABEL: test11: 196; AVX: # %bb.0: 197; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 198; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 199; AVX-NEXT: retq 200 %1 = and <4 x i32> %A, <i32 0, i32 -1, i32 -1, i32 -1> 201 ret <4 x i32> %1 202} 203 204define <4 x i32> @test12(<4 x i32> %A) { 205; SSE-LABEL: test12: 206; SSE: # %bb.0: 207; SSE-NEXT: xorps %xmm1, %xmm1 208; SSE-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3] 209; SSE-NEXT: retq 210; 211; AVX-LABEL: test12: 212; AVX: # %bb.0: 213; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 214; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3] 215; AVX-NEXT: retq 216 %1 = and <4 x i32> %A, <i32 -1, i32 -1, i32 -1, i32 0> 217 ret <4 x i32> %1 218} 219 220define <4 x i32> @test13(<4 x i32> %A) { 221; SSE-LABEL: test13: 222; SSE: # %bb.0: 223; SSE-NEXT: xorps %xmm1, %xmm1 224; SSE-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3] 225; SSE-NEXT: retq 226; 227; AVX-LABEL: test13: 228; AVX: # %bb.0: 229; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 230; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3] 231; AVX-NEXT: retq 232 %1 = and <4 x i32> %A, <i32 -1, i32 -1, i32 0, i32 -1> 233 ret <4 x i32> %1 234} 235 236define <4 x i32> @test14(<4 x i32> %A) { 237; SSE-LABEL: test14: 238; SSE: # %bb.0: 239; SSE-NEXT: xorps %xmm1, %xmm1 240; SSE-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3] 241; SSE-NEXT: retq 242; 243; AVX-LABEL: test14: 244; AVX: # %bb.0: 245; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 246; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3] 247; AVX-NEXT: retq 248 %1 = and <4 x i32> %A, <i32 -1, i32 0, i32 -1, i32 -1> 249 ret <4 x i32> %1 250} 251 252; X & undef must fold to 0. So lane 0 must choose from the zero vector. 253 254define <4 x i32> @undef_lane(<4 x i32> %x) { 255; SSE-LABEL: undef_lane: 256; SSE: # %bb.0: 257; SSE-NEXT: xorps %xmm1, %xmm1 258; SSE-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3] 259; SSE-NEXT: retq 260; 261; AVX-LABEL: undef_lane: 262; AVX: # %bb.0: 263; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 264; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3] 265; AVX-NEXT: retq 266 %r = and <4 x i32> %x, <i32 undef, i32 4294967295, i32 0, i32 4294967295> 267 ret <4 x i32> %r 268} 269 270define <4 x i32> @test15(<4 x i32> %A, <4 x i32> %B) { 271; SSE-LABEL: test15: 272; SSE: # %bb.0: 273; SSE-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3] 274; SSE-NEXT: retq 275; 276; AVX-LABEL: test15: 277; AVX: # %bb.0: 278; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3] 279; AVX-NEXT: retq 280 %1 = and <4 x i32> %A, <i32 -1, i32 0, i32 -1, i32 -1> 281 %2 = and <4 x i32> %B, <i32 0, i32 -1, i32 0, i32 0> 282 %3 = or <4 x i32> %1, %2 283 ret <4 x i32> %3 284} 285 286define <4 x i32> @test16(<4 x i32> %A, <4 x i32> %B) { 287; SSE-LABEL: test16: 288; SSE: # %bb.0: 289; SSE-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] 290; SSE-NEXT: retq 291; 292; AVX-LABEL: test16: 293; AVX: # %bb.0: 294; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] 295; AVX-NEXT: retq 296 %1 = and <4 x i32> %A, <i32 -1, i32 0, i32 -1, i32 0> 297 %2 = and <4 x i32> %B, <i32 0, i32 -1, i32 0, i32 -1> 298 %3 = or <4 x i32> %1, %2 299 ret <4 x i32> %3 300} 301 302define <4 x i32> @test17(<4 x i32> %A, <4 x i32> %B) { 303; SSE-LABEL: test17: 304; SSE: # %bb.0: 305; SSE-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3] 306; SSE-NEXT: retq 307; 308; AVX-LABEL: test17: 309; AVX: # %bb.0: 310; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3] 311; AVX-NEXT: retq 312 %1 = and <4 x i32> %A, <i32 0, i32 -1, i32 0, i32 -1> 313 %2 = and <4 x i32> %B, <i32 -1, i32 0, i32 -1, i32 0> 314 %3 = or <4 x i32> %1, %2 315 ret <4 x i32> %3 316} 317 318; 319; fold (and (or x, C), D) -> D if (C & D) == D 320; 321 322define <2 x i64> @and_or_v2i64(<2 x i64> %a0) { 323; SSE-LABEL: and_or_v2i64: 324; SSE: # %bb.0: 325; SSE-NEXT: movaps {{.*#+}} xmm0 = [8,8] 326; SSE-NEXT: retq 327; 328; AVX-LABEL: and_or_v2i64: 329; AVX: # %bb.0: 330; AVX-NEXT: vmovddup {{.*#+}} xmm0 = [8,8] 331; AVX-NEXT: # xmm0 = mem[0,0] 332; AVX-NEXT: retq 333 %1 = or <2 x i64> %a0, <i64 255, i64 255> 334 %2 = and <2 x i64> %1, <i64 8, i64 8> 335 ret <2 x i64> %2 336} 337 338define <4 x i32> @and_or_v4i32(<4 x i32> %a0) { 339; SSE-LABEL: and_or_v4i32: 340; SSE: # %bb.0: 341; SSE-NEXT: movaps {{.*#+}} xmm0 = [3,3,3,3] 342; SSE-NEXT: retq 343; 344; AVX-LABEL: and_or_v4i32: 345; AVX: # %bb.0: 346; AVX-NEXT: vbroadcastss {{.*#+}} xmm0 = [3,3,3,3] 347; AVX-NEXT: retq 348 %1 = or <4 x i32> %a0, <i32 15, i32 15, i32 15, i32 15> 349 %2 = and <4 x i32> %1, <i32 3, i32 3, i32 3, i32 3> 350 ret <4 x i32> %2 351} 352 353define <8 x i16> @and_or_v8i16(<8 x i16> %a0) { 354; SSE-LABEL: and_or_v8i16: 355; SSE: # %bb.0: 356; SSE-NEXT: movaps {{.*#+}} xmm0 = [15,7,3,1,14,10,2,32767] 357; SSE-NEXT: retq 358; 359; AVX-LABEL: and_or_v8i16: 360; AVX: # %bb.0: 361; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [15,7,3,1,14,10,2,32767] 362; AVX-NEXT: retq 363 %1 = or <8 x i16> %a0, <i16 255, i16 127, i16 63, i16 31, i16 15, i16 31, i16 63, i16 -1> 364 %2 = and <8 x i16> %1, <i16 15, i16 7, i16 3, i16 1, i16 14, i16 10, i16 2, i16 32767> 365 ret <8 x i16> %2 366} 367 368; 369; Check we merge and(ext(and(x,c1)),c2) before an and gets folded to a shuffle clear mask 370; 371 372define <8 x i32> @clear_sext_and(<8 x i16> %x) { 373; SSE-LABEL: clear_sext_and: 374; SSE: # %bb.0: 375; SSE-NEXT: pmovsxwd %xmm0, %xmm2 376; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 377; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 378; SSE-NEXT: pmovsxwd %xmm0, %xmm1 379; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 380; SSE-NEXT: movdqa %xmm2, %xmm0 381; SSE-NEXT: retq 382; 383; AVX1-LABEL: clear_sext_and: 384; AVX1: # %bb.0: 385; AVX1-NEXT: vpmovsxwd %xmm0, %xmm1 386; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 387; AVX1-NEXT: vpmovsxwd %xmm0, %xmm0 388; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 389; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 390; AVX1-NEXT: retq 391; 392; AVX2-LABEL: clear_sext_and: 393; AVX2: # %bb.0: 394; AVX2-NEXT: vpmovsxwd %xmm0, %ymm0 395; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 396; AVX2-NEXT: retq 397; 398; AVX512-LABEL: clear_sext_and: 399; AVX512: # %bb.0: 400; AVX512-NEXT: vpmovsxwd %xmm0, %ymm0 401; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 402; AVX512-NEXT: retq 403 %1 = and <8 x i16> %x, <i16 -1, i16 3, i16 7, i16 15, i16 31, i16 63, i16 127, i16 -1> 404 %2 = sext <8 x i16> %1 to <8 x i32> 405 %3 = and <8 x i32> %2, <i32 -1, i32 0, i32 -1, i32 0, i32 0, i32 -1, i32 -1, i32 -1> 406 ret <8 x i32> %3 407} 408 409define <8 x i32> @clear_zext_and(<8 x i16> %x) { 410; SSE-LABEL: clear_zext_and: 411; SSE: # %bb.0: 412; SSE-NEXT: movdqa %xmm0, %xmm1 413; SSE-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 414; SSE-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7] 415; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 416; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 417; SSE-NEXT: retq 418; 419; AVX1-LABEL: clear_zext_and: 420; AVX1: # %bb.0: 421; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4,4,5,5,6,6,7,7] 422; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 423; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 424; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 425; AVX1-NEXT: retq 426; 427; AVX2-LABEL: clear_zext_and: 428; AVX2: # %bb.0: 429; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 430; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 431; AVX2-NEXT: retq 432; 433; AVX512-LABEL: clear_zext_and: 434; AVX512: # %bb.0: 435; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 436; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 437; AVX512-NEXT: retq 438 %1 = and <8 x i16> %x, <i16 -1, i16 3, i16 7, i16 15, i16 31, i16 63, i16 127, i16 -1> 439 %2 = zext <8 x i16> %1 to <8 x i32> 440 %3 = and <8 x i32> %2, <i32 -1, i32 0, i32 -1, i32 0, i32 0, i32 -1, i32 -1, i32 -1> 441 ret <8 x i32> %3 442} 443 444; 445; known bits folding 446; 447 448define <2 x i64> @and_or_zext_v2i32(<2 x i32> %a0) { 449; SSE-LABEL: and_or_zext_v2i32: 450; SSE: # %bb.0: 451; SSE-NEXT: xorps %xmm0, %xmm0 452; SSE-NEXT: retq 453; 454; AVX-LABEL: and_or_zext_v2i32: 455; AVX: # %bb.0: 456; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 457; AVX-NEXT: retq 458 %1 = zext <2 x i32> %a0 to <2 x i64> 459 %2 = or <2 x i64> %1, <i64 1, i64 1> 460 %3 = and <2 x i64> %2, <i64 4294967296, i64 4294967296> 461 ret <2 x i64> %3 462} 463 464define <4 x i32> @and_or_zext_v4i16(<4 x i16> %a0) { 465; SSE-LABEL: and_or_zext_v4i16: 466; SSE: # %bb.0: 467; SSE-NEXT: xorps %xmm0, %xmm0 468; SSE-NEXT: retq 469; 470; AVX-LABEL: and_or_zext_v4i16: 471; AVX: # %bb.0: 472; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 473; AVX-NEXT: retq 474 %1 = zext <4 x i16> %a0 to <4 x i32> 475 %2 = or <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1> 476 %3 = and <4 x i32> %2, <i32 65536, i32 65536, i32 65536, i32 65536> 477 ret <4 x i32> %3 478} 479 480; 481; known sign bits folding 482; 483 484define <8 x i16> @ashr_mask1_v8i16(<8 x i16> %a0) { 485; SSE-LABEL: ashr_mask1_v8i16: 486; SSE: # %bb.0: 487; SSE-NEXT: psrlw $15, %xmm0 488; SSE-NEXT: retq 489; 490; AVX-LABEL: ashr_mask1_v8i16: 491; AVX: # %bb.0: 492; AVX-NEXT: vpsrlw $15, %xmm0, %xmm0 493; AVX-NEXT: retq 494 %1 = ashr <8 x i16> %a0, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> 495 %2 = and <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 496 ret <8 x i16> %2 497} 498 499define <4 x i32> @ashr_mask7_v4i32(<4 x i32> %a0) { 500; SSE-LABEL: ashr_mask7_v4i32: 501; SSE: # %bb.0: 502; SSE-NEXT: psrad $31, %xmm0 503; SSE-NEXT: psrld $29, %xmm0 504; SSE-NEXT: retq 505; 506; AVX-LABEL: ashr_mask7_v4i32: 507; AVX: # %bb.0: 508; AVX-NEXT: vpsrad $31, %xmm0, %xmm0 509; AVX-NEXT: vpsrld $29, %xmm0, %xmm0 510; AVX-NEXT: retq 511 %1 = ashr <4 x i32> %a0, <i32 31, i32 31, i32 31, i32 31> 512 %2 = and <4 x i32> %1, <i32 7, i32 7, i32 7, i32 7> 513 ret <4 x i32> %2 514} 515 516; 517; SimplifyDemandedBits 518; 519 520; PR34620 - redundant PAND after vector shift of a byte vector (PSRLW) 521define <16 x i8> @PR34620(<16 x i8> %a0, <16 x i8> %a1) { 522; SSE-LABEL: PR34620: 523; SSE: # %bb.0: 524; SSE-NEXT: psrlw $1, %xmm0 525; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 526; SSE-NEXT: paddb %xmm1, %xmm0 527; SSE-NEXT: retq 528; 529; AVX1-LABEL: PR34620: 530; AVX1: # %bb.0: 531; AVX1-NEXT: vpsrlw $1, %xmm0, %xmm0 532; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 533; AVX1-NEXT: vpaddb %xmm1, %xmm0, %xmm0 534; AVX1-NEXT: retq 535; 536; AVX2-LABEL: PR34620: 537; AVX2: # %bb.0: 538; AVX2-NEXT: vpsrlw $1, %xmm0, %xmm0 539; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 540; AVX2-NEXT: vpaddb %xmm1, %xmm0, %xmm0 541; AVX2-NEXT: retq 542; 543; AVX512-LABEL: PR34620: 544; AVX512: # %bb.0: 545; AVX512-NEXT: vpsrlw $1, %xmm0, %xmm0 546; AVX512-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 547; AVX512-NEXT: vpaddb %xmm1, %xmm0, %xmm0 548; AVX512-NEXT: retq 549 %1 = lshr <16 x i8> %a0, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 550 %2 = and <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 551 %3 = add <16 x i8> %2, %a1 552 ret <16 x i8> %3 553} 554 555; 556; Simplify and with a broadcasted negated scalar 557; 558 559define <8 x i64> @neg_scalar_broadcast_v8i64_arg(i64 %a0, <8 x i64> %a1) { 560; SSE-LABEL: neg_scalar_broadcast_v8i64_arg: 561; SSE: # %bb.0: 562; SSE-NEXT: notq %rdi 563; SSE-NEXT: movq %rdi, %xmm4 564; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,1,0,1] 565; SSE-NEXT: pand %xmm4, %xmm0 566; SSE-NEXT: pand %xmm4, %xmm1 567; SSE-NEXT: pand %xmm4, %xmm2 568; SSE-NEXT: pand %xmm4, %xmm3 569; SSE-NEXT: retq 570; 571; AVX1-LABEL: neg_scalar_broadcast_v8i64_arg: 572; AVX1: # %bb.0: 573; AVX1-NEXT: notq %rdi 574; AVX1-NEXT: vmovq %rdi, %xmm2 575; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,1] 576; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm2, %ymm2 577; AVX1-NEXT: vandps %ymm0, %ymm2, %ymm0 578; AVX1-NEXT: vandps %ymm1, %ymm2, %ymm1 579; AVX1-NEXT: retq 580; 581; AVX2-LABEL: neg_scalar_broadcast_v8i64_arg: 582; AVX2: # %bb.0: 583; AVX2-NEXT: notq %rdi 584; AVX2-NEXT: vmovq %rdi, %xmm2 585; AVX2-NEXT: vpbroadcastq %xmm2, %ymm2 586; AVX2-NEXT: vpand %ymm0, %ymm2, %ymm0 587; AVX2-NEXT: vpand %ymm1, %ymm2, %ymm1 588; AVX2-NEXT: retq 589; 590; AVX512-LABEL: neg_scalar_broadcast_v8i64_arg: 591; AVX512: # %bb.0: 592; AVX512-NEXT: vpbroadcastq %rdi, %zmm1 593; AVX512-NEXT: vpandnq %zmm0, %zmm1, %zmm0 594; AVX512-NEXT: retq 595 %1 = xor i64 %a0, -1 596 %2 = insertelement <8 x i64> undef, i64 %1, i64 0 597 %3 = shufflevector <8 x i64> %2, <8 x i64> poison, <8 x i32> zeroinitializer 598 %4 = and <8 x i64> %3, %a1 599 ret <8 x i64> %4 600} 601 602define <8 x i64> @neg_scalar_broadcast_v8i64(i64 %a0, <2 x i64> %a1) { 603; SSE-LABEL: neg_scalar_broadcast_v8i64: 604; SSE: # %bb.0: 605; SSE-NEXT: movdqa %xmm0, %xmm2 606; SSE-NEXT: notq %rdi 607; SSE-NEXT: movq %rdi, %xmm0 608; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm0[0,1,0,1] 609; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,1,0,1] 610; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,3,2,3] 611; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1] 612; SSE-NEXT: pand %xmm4, %xmm0 613; SSE-NEXT: pand %xmm4, %xmm1 614; SSE-NEXT: pand %xmm4, %xmm2 615; SSE-NEXT: pand %xmm4, %xmm3 616; SSE-NEXT: retq 617; 618; AVX1-LABEL: neg_scalar_broadcast_v8i64: 619; AVX1: # %bb.0: 620; AVX1-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 621; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm0[0,1,0,1] 622; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 623; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 624; AVX1-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1,0,3,3] 625; AVX1-NEXT: vmovq %rdi, %xmm2 626; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,1] 627; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm2, %ymm2 628; AVX1-NEXT: vandnpd %ymm0, %ymm2, %ymm0 629; AVX1-NEXT: vandnpd %ymm1, %ymm2, %ymm1 630; AVX1-NEXT: retq 631; 632; AVX2-LABEL: neg_scalar_broadcast_v8i64: 633; AVX2: # %bb.0: 634; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 635; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[0,1,0,0] 636; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,0,1,1] 637; AVX2-NEXT: vmovq %rdi, %xmm2 638; AVX2-NEXT: vpbroadcastq %xmm2, %ymm2 639; AVX2-NEXT: vpandn %ymm0, %ymm2, %ymm0 640; AVX2-NEXT: vpandn %ymm1, %ymm2, %ymm1 641; AVX2-NEXT: retq 642; 643; AVX512-LABEL: neg_scalar_broadcast_v8i64: 644; AVX512: # %bb.0: 645; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 646; AVX512-NEXT: vpbroadcastq %rdi, %zmm1 647; AVX512-NEXT: vpmovsxbq {{.*#+}} zmm2 = [1,0,1,1,0,1,0,0] 648; AVX512-NEXT: vpermq %zmm0, %zmm2, %zmm0 649; AVX512-NEXT: vpandnq %zmm0, %zmm1, %zmm0 650; AVX512-NEXT: retq 651 %1 = xor i64 %a0, -1 652 %2 = insertelement <8 x i64> undef, i64 %1, i64 0 653 %3 = shufflevector <8 x i64> %2, <8 x i64> poison, <8 x i32> zeroinitializer 654 %4 = shufflevector <2 x i64> %a1, <2 x i64> poison, <8 x i32> <i32 1, i32 0, i32 1, i32 1, i32 0, i32 1, i32 0, i32 0> 655 %5 = and <8 x i64> %4, %3 656 ret <8 x i64> %5 657} 658 659define <4 x i64> @neg_scalar_broadcast_v4i64_arg(i64 %a0, <4 x i64> %a1) { 660; SSE-LABEL: neg_scalar_broadcast_v4i64_arg: 661; SSE: # %bb.0: 662; SSE-NEXT: notq %rdi 663; SSE-NEXT: movq %rdi, %xmm2 664; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,1,0,1] 665; SSE-NEXT: pand %xmm2, %xmm0 666; SSE-NEXT: pand %xmm2, %xmm1 667; SSE-NEXT: retq 668; 669; AVX1-LABEL: neg_scalar_broadcast_v4i64_arg: 670; AVX1: # %bb.0: 671; AVX1-NEXT: vmovq %rdi, %xmm1 672; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,1] 673; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1 674; AVX1-NEXT: vandnps %ymm0, %ymm1, %ymm0 675; AVX1-NEXT: retq 676; 677; AVX2-LABEL: neg_scalar_broadcast_v4i64_arg: 678; AVX2: # %bb.0: 679; AVX2-NEXT: vmovq %rdi, %xmm1 680; AVX2-NEXT: vpbroadcastq %xmm1, %ymm1 681; AVX2-NEXT: vpandn %ymm0, %ymm1, %ymm0 682; AVX2-NEXT: retq 683; 684; AVX512-LABEL: neg_scalar_broadcast_v4i64_arg: 685; AVX512: # %bb.0: 686; AVX512-NEXT: vpbroadcastq %rdi, %ymm1 687; AVX512-NEXT: vpandn %ymm0, %ymm1, %ymm0 688; AVX512-NEXT: retq 689 %1 = xor i64 %a0, -1 690 %2 = insertelement <4 x i64> undef, i64 %1, i64 0 691 %3 = shufflevector <4 x i64> %2, <4 x i64> poison, <4 x i32> zeroinitializer 692 %4 = and <4 x i64> %3, %a1 693 ret <4 x i64> %4 694} 695 696define <4 x i64> @neg_scalar_broadcast_v4i64(i64 %a0, <2 x i64> %a1) { 697; SSE-LABEL: neg_scalar_broadcast_v4i64: 698; SSE: # %bb.0: 699; SSE-NEXT: notq %rdi 700; SSE-NEXT: movq %rdi, %xmm1 701; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm1[0,1,0,1] 702; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 703; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 704; SSE-NEXT: pand %xmm2, %xmm0 705; SSE-NEXT: pand %xmm2, %xmm1 706; SSE-NEXT: retq 707; 708; AVX1-LABEL: neg_scalar_broadcast_v4i64: 709; AVX1: # %bb.0: 710; AVX1-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 711; AVX1-NEXT: vmovq %rdi, %xmm1 712; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,1] 713; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1 714; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 715; AVX1-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1,0,3,3] 716; AVX1-NEXT: vandnpd %ymm0, %ymm1, %ymm0 717; AVX1-NEXT: retq 718; 719; AVX2-LABEL: neg_scalar_broadcast_v4i64: 720; AVX2: # %bb.0: 721; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 722; AVX2-NEXT: vmovq %rdi, %xmm1 723; AVX2-NEXT: vpbroadcastq %xmm1, %ymm1 724; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,0,1,1] 725; AVX2-NEXT: vpandn %ymm0, %ymm1, %ymm0 726; AVX2-NEXT: retq 727; 728; AVX512-LABEL: neg_scalar_broadcast_v4i64: 729; AVX512: # %bb.0: 730; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 731; AVX512-NEXT: vpbroadcastq %rdi, %ymm1 732; AVX512-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,0,1,1] 733; AVX512-NEXT: vpandn %ymm0, %ymm1, %ymm0 734; AVX512-NEXT: retq 735 %1 = xor i64 %a0, -1 736 %2 = insertelement <4 x i64> undef, i64 %1, i64 0 737 %3 = shufflevector <4 x i64> %2, <4 x i64> poison, <4 x i32> zeroinitializer 738 %4 = shufflevector <2 x i64> %a1, <2 x i64> poison, <4 x i32> <i32 1, i32 0, i32 1, i32 1> 739 %5 = and <4 x i64> %4, %3 740 ret <4 x i64> %5 741} 742 743define <2 x i64> @neg_scalar_broadcast_v2i64(i64 %a0, <2 x i64> %a1) { 744; SSE-LABEL: neg_scalar_broadcast_v2i64: 745; SSE: # %bb.0: 746; SSE-NEXT: movq %rdi, %xmm1 747; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,1] 748; SSE-NEXT: pandn %xmm0, %xmm1 749; SSE-NEXT: movdqa %xmm1, %xmm0 750; SSE-NEXT: retq 751; 752; AVX1-LABEL: neg_scalar_broadcast_v2i64: 753; AVX1: # %bb.0: 754; AVX1-NEXT: vmovq %rdi, %xmm1 755; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,1] 756; AVX1-NEXT: vpandn %xmm0, %xmm1, %xmm0 757; AVX1-NEXT: retq 758; 759; AVX2-LABEL: neg_scalar_broadcast_v2i64: 760; AVX2: # %bb.0: 761; AVX2-NEXT: vmovq %rdi, %xmm1 762; AVX2-NEXT: vpbroadcastq %xmm1, %xmm1 763; AVX2-NEXT: vpandn %xmm0, %xmm1, %xmm0 764; AVX2-NEXT: retq 765; 766; AVX512-LABEL: neg_scalar_broadcast_v2i64: 767; AVX512: # %bb.0: 768; AVX512-NEXT: vpbroadcastq %rdi, %xmm1 769; AVX512-NEXT: vpandn %xmm0, %xmm1, %xmm0 770; AVX512-NEXT: retq 771 %1 = xor i64 %a0, -1 772 %2 = insertelement <2 x i64> undef, i64 %1, i64 0 773 %3 = shufflevector <2 x i64> %2, <2 x i64> poison, <2 x i32> zeroinitializer 774 %4 = and <2 x i64> %3, %a1 775 ret <2 x i64> %4 776} 777 778define <2 x i64> @casted_neg_scalar_broadcast_v2i64(<2 x i32> %a0, <2 x i64> %a1) { 779; SSE-LABEL: casted_neg_scalar_broadcast_v2i64: 780; SSE: # %bb.0: 781; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 782; SSE-NEXT: pandn %xmm1, %xmm0 783; SSE-NEXT: retq 784; 785; AVX1-LABEL: casted_neg_scalar_broadcast_v2i64: 786; AVX1: # %bb.0: 787; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1,0,1] 788; AVX1-NEXT: vandnps %xmm1, %xmm0, %xmm0 789; AVX1-NEXT: retq 790; 791; AVX2-LABEL: casted_neg_scalar_broadcast_v2i64: 792; AVX2: # %bb.0: 793; AVX2-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] 794; AVX2-NEXT: vandnps %xmm1, %xmm0, %xmm0 795; AVX2-NEXT: retq 796; 797; AVX512-LABEL: casted_neg_scalar_broadcast_v2i64: 798; AVX512: # %bb.0: 799; AVX512-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] 800; AVX512-NEXT: vandnps %xmm1, %xmm0, %xmm0 801; AVX512-NEXT: retq 802 %1 = xor <2 x i32> %a0, <i32 -1, i32 -1> 803 %2 = bitcast <2 x i32> %1 to i64 804 %3 = insertelement <2 x i64> undef, i64 %2, i64 0 805 %4 = shufflevector <2 x i64> %3, <2 x i64> poison, <2 x i32> zeroinitializer 806 %5 = and <2 x i64> %4, %a1 807 ret <2 x i64> %5 808} 809 810define <8 x i32> @neg_scalar_broadcast_v8i32(i32 %a0, <8 x i32> %a1) { 811; SSE-LABEL: neg_scalar_broadcast_v8i32: 812; SSE: # %bb.0: 813; SSE-NEXT: notl %edi 814; SSE-NEXT: movd %edi, %xmm2 815; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0] 816; SSE-NEXT: pand %xmm2, %xmm0 817; SSE-NEXT: pand %xmm2, %xmm1 818; SSE-NEXT: retq 819; 820; AVX1-LABEL: neg_scalar_broadcast_v8i32: 821; AVX1: # %bb.0: 822; AVX1-NEXT: vmovd %edi, %xmm1 823; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] 824; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1 825; AVX1-NEXT: vandnps %ymm0, %ymm1, %ymm0 826; AVX1-NEXT: retq 827; 828; AVX2-LABEL: neg_scalar_broadcast_v8i32: 829; AVX2: # %bb.0: 830; AVX2-NEXT: vmovd %edi, %xmm1 831; AVX2-NEXT: vpbroadcastd %xmm1, %ymm1 832; AVX2-NEXT: vpandn %ymm0, %ymm1, %ymm0 833; AVX2-NEXT: retq 834; 835; AVX512-LABEL: neg_scalar_broadcast_v8i32: 836; AVX512: # %bb.0: 837; AVX512-NEXT: vpbroadcastd %edi, %ymm1 838; AVX512-NEXT: vpandn %ymm0, %ymm1, %ymm0 839; AVX512-NEXT: retq 840 %1 = xor i32 %a0, -1 841 %2 = insertelement <8 x i32> undef, i32 %1, i64 0 842 %3 = shufflevector <8 x i32> %2, <8 x i32> poison, <8 x i32> zeroinitializer 843 %4 = and <8 x i32> %3, %a1 844 ret <8 x i32> %4 845} 846 847define <8 x i16> @neg_scalar_broadcast_v8i16(i16 %a0, <8 x i16> %a1) { 848; SSE-LABEL: neg_scalar_broadcast_v8i16: 849; SSE: # %bb.0: 850; SSE-NEXT: movd %edi, %xmm1 851; SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7] 852; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] 853; SSE-NEXT: pandn %xmm0, %xmm1 854; SSE-NEXT: movdqa %xmm1, %xmm0 855; SSE-NEXT: retq 856; 857; AVX1-LABEL: neg_scalar_broadcast_v8i16: 858; AVX1: # %bb.0: 859; AVX1-NEXT: vmovd %edi, %xmm1 860; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7] 861; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] 862; AVX1-NEXT: vpandn %xmm0, %xmm1, %xmm0 863; AVX1-NEXT: retq 864; 865; AVX2-LABEL: neg_scalar_broadcast_v8i16: 866; AVX2: # %bb.0: 867; AVX2-NEXT: vmovd %edi, %xmm1 868; AVX2-NEXT: vpbroadcastw %xmm1, %xmm1 869; AVX2-NEXT: vpandn %xmm0, %xmm1, %xmm0 870; AVX2-NEXT: retq 871; 872; AVX512-LABEL: neg_scalar_broadcast_v8i16: 873; AVX512: # %bb.0: 874; AVX512-NEXT: vpbroadcastw %edi, %xmm1 875; AVX512-NEXT: vpandn %xmm0, %xmm1, %xmm0 876; AVX512-NEXT: retq 877 %1 = xor i16 %a0, -1 878 %2 = insertelement <8 x i16> undef, i16 %1, i64 0 879 %3 = shufflevector <8 x i16> %2, <8 x i16> poison, <8 x i32> zeroinitializer 880 %4 = and <8 x i16> %3, %a1 881 ret <8 x i16> %4 882} 883 884define <16 x i8> @neg_scalar_broadcast_v16i8(i8 %a0, <16 x i8> %a1) { 885; SSE-LABEL: neg_scalar_broadcast_v16i8: 886; SSE: # %bb.0: 887; SSE-NEXT: movd %edi, %xmm1 888; SSE-NEXT: pxor %xmm2, %xmm2 889; SSE-NEXT: pshufb %xmm2, %xmm1 890; SSE-NEXT: pandn %xmm0, %xmm1 891; SSE-NEXT: movdqa %xmm1, %xmm0 892; SSE-NEXT: retq 893; 894; AVX1-LABEL: neg_scalar_broadcast_v16i8: 895; AVX1: # %bb.0: 896; AVX1-NEXT: vmovd %edi, %xmm1 897; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 898; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 899; AVX1-NEXT: vpandn %xmm0, %xmm1, %xmm0 900; AVX1-NEXT: retq 901; 902; AVX2-LABEL: neg_scalar_broadcast_v16i8: 903; AVX2: # %bb.0: 904; AVX2-NEXT: vmovd %edi, %xmm1 905; AVX2-NEXT: vpbroadcastb %xmm1, %xmm1 906; AVX2-NEXT: vpandn %xmm0, %xmm1, %xmm0 907; AVX2-NEXT: retq 908; 909; AVX512-LABEL: neg_scalar_broadcast_v16i8: 910; AVX512: # %bb.0: 911; AVX512-NEXT: vpbroadcastb %edi, %xmm1 912; AVX512-NEXT: vpandn %xmm0, %xmm1, %xmm0 913; AVX512-NEXT: retq 914 %1 = xor i8 %a0, -1 915 %2 = insertelement <16 x i8> undef, i8 %1, i64 0 916 %3 = shufflevector <16 x i8> %2, <16 x i8> poison, <16 x i32> zeroinitializer 917 %4 = and <16 x i8> %3, %a1 918 ret <16 x i8> %4 919} 920 921define <64 x i8> @neg_scalar_broadcast_v64i8(i8 %a0, <64 x i8> %a1) { 922; SSE-LABEL: neg_scalar_broadcast_v64i8: 923; SSE: # %bb.0: 924; SSE-NEXT: notb %dil 925; SSE-NEXT: movzbl %dil, %eax 926; SSE-NEXT: movd %eax, %xmm4 927; SSE-NEXT: pxor %xmm5, %xmm5 928; SSE-NEXT: pshufb %xmm5, %xmm4 929; SSE-NEXT: pand %xmm4, %xmm0 930; SSE-NEXT: pand %xmm4, %xmm1 931; SSE-NEXT: pand %xmm4, %xmm2 932; SSE-NEXT: pand %xmm4, %xmm3 933; SSE-NEXT: retq 934; 935; AVX1-LABEL: neg_scalar_broadcast_v64i8: 936; AVX1: # %bb.0: 937; AVX1-NEXT: notb %dil 938; AVX1-NEXT: vmovd %edi, %xmm2 939; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 940; AVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2 941; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm2, %ymm2 942; AVX1-NEXT: vandps %ymm0, %ymm2, %ymm0 943; AVX1-NEXT: vandps %ymm1, %ymm2, %ymm1 944; AVX1-NEXT: retq 945; 946; AVX2-LABEL: neg_scalar_broadcast_v64i8: 947; AVX2: # %bb.0: 948; AVX2-NEXT: notb %dil 949; AVX2-NEXT: vmovd %edi, %xmm2 950; AVX2-NEXT: vpbroadcastb %xmm2, %ymm2 951; AVX2-NEXT: vpand %ymm0, %ymm2, %ymm0 952; AVX2-NEXT: vpand %ymm1, %ymm2, %ymm1 953; AVX2-NEXT: retq 954; 955; AVX512-LABEL: neg_scalar_broadcast_v64i8: 956; AVX512: # %bb.0: 957; AVX512-NEXT: vpbroadcastb %edi, %zmm1 958; AVX512-NEXT: vpandnq %zmm0, %zmm1, %zmm0 959; AVX512-NEXT: retq 960 %1 = xor i8 %a0, -1 961 %2 = insertelement <64 x i8> undef, i8 %1, i64 0 962 %3 = shufflevector <64 x i8> %2, <64 x i8> poison, <64 x i32> zeroinitializer 963 %4 = and <64 x i8> %3, %a1 964 ret <64 x i8> %4 965} 966 967define <8 x i64> @neg_scalar_broadcast_v64i8_v8i64(i8 %a0, <8 x i64> %a1) { 968; SSE-LABEL: neg_scalar_broadcast_v64i8_v8i64: 969; SSE: # %bb.0: 970; SSE-NEXT: notb %dil 971; SSE-NEXT: movzbl %dil, %eax 972; SSE-NEXT: movd %eax, %xmm4 973; SSE-NEXT: pxor %xmm5, %xmm5 974; SSE-NEXT: pshufb %xmm5, %xmm4 975; SSE-NEXT: pand %xmm4, %xmm0 976; SSE-NEXT: pand %xmm4, %xmm1 977; SSE-NEXT: pand %xmm4, %xmm2 978; SSE-NEXT: pand %xmm4, %xmm3 979; SSE-NEXT: retq 980; 981; AVX1-LABEL: neg_scalar_broadcast_v64i8_v8i64: 982; AVX1: # %bb.0: 983; AVX1-NEXT: notb %dil 984; AVX1-NEXT: vmovd %edi, %xmm2 985; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 986; AVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2 987; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm2, %ymm2 988; AVX1-NEXT: vandps %ymm0, %ymm2, %ymm0 989; AVX1-NEXT: vandps %ymm1, %ymm2, %ymm1 990; AVX1-NEXT: retq 991; 992; AVX2-LABEL: neg_scalar_broadcast_v64i8_v8i64: 993; AVX2: # %bb.0: 994; AVX2-NEXT: notb %dil 995; AVX2-NEXT: vmovd %edi, %xmm2 996; AVX2-NEXT: vpbroadcastb %xmm2, %ymm2 997; AVX2-NEXT: vpand %ymm0, %ymm2, %ymm0 998; AVX2-NEXT: vpand %ymm1, %ymm2, %ymm1 999; AVX2-NEXT: retq 1000; 1001; AVX512-LABEL: neg_scalar_broadcast_v64i8_v8i64: 1002; AVX512: # %bb.0: 1003; AVX512-NEXT: vpbroadcastb %edi, %zmm1 1004; AVX512-NEXT: vpandnq %zmm0, %zmm1, %zmm0 1005; AVX512-NEXT: retq 1006 %1 = xor i8 %a0, -1 1007 %2 = insertelement <64 x i8> undef, i8 %1, i64 0 1008 %3 = shufflevector <64 x i8> %2, <64 x i8> poison, <64 x i32> zeroinitializer 1009 %4 = bitcast <64 x i8> %3 to <8 x i64> 1010 %5 = and <8 x i64> %4, %a1 1011 ret <8 x i64> %5 1012} 1013 1014define <4 x i64> @neg_scalar_broadcast_v32i8_v4i64(i8 %a0, <4 x i64> %a1) { 1015; SSE-LABEL: neg_scalar_broadcast_v32i8_v4i64: 1016; SSE: # %bb.0: 1017; SSE-NEXT: notb %dil 1018; SSE-NEXT: movzbl %dil, %eax 1019; SSE-NEXT: movd %eax, %xmm2 1020; SSE-NEXT: pxor %xmm3, %xmm3 1021; SSE-NEXT: pshufb %xmm3, %xmm2 1022; SSE-NEXT: pand %xmm2, %xmm0 1023; SSE-NEXT: pand %xmm2, %xmm1 1024; SSE-NEXT: retq 1025; 1026; AVX1-LABEL: neg_scalar_broadcast_v32i8_v4i64: 1027; AVX1: # %bb.0: 1028; AVX1-NEXT: vmovd %edi, %xmm1 1029; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 1030; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 1031; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1 1032; AVX1-NEXT: vandnps %ymm0, %ymm1, %ymm0 1033; AVX1-NEXT: retq 1034; 1035; AVX2-LABEL: neg_scalar_broadcast_v32i8_v4i64: 1036; AVX2: # %bb.0: 1037; AVX2-NEXT: vmovd %edi, %xmm1 1038; AVX2-NEXT: vpbroadcastb %xmm1, %ymm1 1039; AVX2-NEXT: vpandn %ymm0, %ymm1, %ymm0 1040; AVX2-NEXT: retq 1041; 1042; AVX512-LABEL: neg_scalar_broadcast_v32i8_v4i64: 1043; AVX512: # %bb.0: 1044; AVX512-NEXT: vpbroadcastb %edi, %ymm1 1045; AVX512-NEXT: vpandn %ymm0, %ymm1, %ymm0 1046; AVX512-NEXT: retq 1047 %1 = xor i8 %a0, -1 1048 %2 = insertelement <32 x i8> undef, i8 %1, i64 0 1049 %3 = shufflevector <32 x i8> %2, <32 x i8> poison, <32 x i32> zeroinitializer 1050 %4 = bitcast <32 x i8> %3 to <4 x i64> 1051 %5 = and <4 x i64> %4, %a1 1052 ret <4 x i64> %5 1053} 1054 1055define <2 x i64> @neg_scalar_broadcast_v16i8_v2i64(i8 %a0, <2 x i64> %a1) { 1056; SSE-LABEL: neg_scalar_broadcast_v16i8_v2i64: 1057; SSE: # %bb.0: 1058; SSE-NEXT: movd %edi, %xmm1 1059; SSE-NEXT: pxor %xmm2, %xmm2 1060; SSE-NEXT: pshufb %xmm2, %xmm1 1061; SSE-NEXT: pandn %xmm0, %xmm1 1062; SSE-NEXT: movdqa %xmm1, %xmm0 1063; SSE-NEXT: retq 1064; 1065; AVX1-LABEL: neg_scalar_broadcast_v16i8_v2i64: 1066; AVX1: # %bb.0: 1067; AVX1-NEXT: vmovd %edi, %xmm1 1068; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 1069; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 1070; AVX1-NEXT: vpandn %xmm0, %xmm1, %xmm0 1071; AVX1-NEXT: retq 1072; 1073; AVX2-LABEL: neg_scalar_broadcast_v16i8_v2i64: 1074; AVX2: # %bb.0: 1075; AVX2-NEXT: vmovd %edi, %xmm1 1076; AVX2-NEXT: vpbroadcastb %xmm1, %xmm1 1077; AVX2-NEXT: vpandn %xmm0, %xmm1, %xmm0 1078; AVX2-NEXT: retq 1079; 1080; AVX512-LABEL: neg_scalar_broadcast_v16i8_v2i64: 1081; AVX512: # %bb.0: 1082; AVX512-NEXT: vpbroadcastb %edi, %xmm1 1083; AVX512-NEXT: vpandn %xmm0, %xmm1, %xmm0 1084; AVX512-NEXT: retq 1085 %1 = xor i8 %a0, -1 1086 %2 = insertelement <16 x i8> undef, i8 %1, i64 0 1087 %3 = shufflevector <16 x i8> %2, <16 x i8> poison, <16 x i32> zeroinitializer 1088 %4 = bitcast <16 x i8> %3 to <2 x i64> 1089 %5 = and <2 x i64> %4, %a1 1090 ret <2 x i64> %5 1091} 1092 1093define <4 x i64> @neg_scalar_broadcast_v8i32_v4i64(i32 %a0, <4 x i64> %a1) { 1094; SSE-LABEL: neg_scalar_broadcast_v8i32_v4i64: 1095; SSE: # %bb.0: 1096; SSE-NEXT: notl %edi 1097; SSE-NEXT: movd %edi, %xmm2 1098; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0] 1099; SSE-NEXT: pand %xmm2, %xmm0 1100; SSE-NEXT: pand %xmm2, %xmm1 1101; SSE-NEXT: retq 1102; 1103; AVX1-LABEL: neg_scalar_broadcast_v8i32_v4i64: 1104; AVX1: # %bb.0: 1105; AVX1-NEXT: vmovd %edi, %xmm1 1106; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] 1107; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1 1108; AVX1-NEXT: vandnps %ymm0, %ymm1, %ymm0 1109; AVX1-NEXT: retq 1110; 1111; AVX2-LABEL: neg_scalar_broadcast_v8i32_v4i64: 1112; AVX2: # %bb.0: 1113; AVX2-NEXT: vmovd %edi, %xmm1 1114; AVX2-NEXT: vpbroadcastd %xmm1, %ymm1 1115; AVX2-NEXT: vpandn %ymm0, %ymm1, %ymm0 1116; AVX2-NEXT: retq 1117; 1118; AVX512-LABEL: neg_scalar_broadcast_v8i32_v4i64: 1119; AVX512: # %bb.0: 1120; AVX512-NEXT: vpbroadcastd %edi, %ymm1 1121; AVX512-NEXT: vpandn %ymm0, %ymm1, %ymm0 1122; AVX512-NEXT: retq 1123 %1 = xor i32 %a0, -1 1124 %2 = insertelement <8 x i32> undef, i32 %1, i64 0 1125 %3 = shufflevector <8 x i32> %2, <8 x i32> poison, <8 x i32> zeroinitializer 1126 %4 = bitcast <8 x i32> %3 to <4 x i64> 1127 %5 = and <4 x i64> %4, %a1 1128 ret <4 x i64> %5 1129} 1130 1131define <4 x i32> @neg_scalar_broadcast_two_uses(i32 %a0, <4 x i32> %a1, ptr %a2) { 1132; SSE-LABEL: neg_scalar_broadcast_two_uses: 1133; SSE: # %bb.0: 1134; SSE-NEXT: notl %edi 1135; SSE-NEXT: movd %edi, %xmm1 1136; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] 1137; SSE-NEXT: movdqa %xmm1, (%rsi) 1138; SSE-NEXT: pand %xmm1, %xmm0 1139; SSE-NEXT: retq 1140; 1141; AVX1-LABEL: neg_scalar_broadcast_two_uses: 1142; AVX1: # %bb.0: 1143; AVX1-NEXT: notl %edi 1144; AVX1-NEXT: vmovd %edi, %xmm1 1145; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] 1146; AVX1-NEXT: vmovdqa %xmm1, (%rsi) 1147; AVX1-NEXT: vpand %xmm0, %xmm1, %xmm0 1148; AVX1-NEXT: retq 1149; 1150; AVX2-LABEL: neg_scalar_broadcast_two_uses: 1151; AVX2: # %bb.0: 1152; AVX2-NEXT: notl %edi 1153; AVX2-NEXT: vmovd %edi, %xmm1 1154; AVX2-NEXT: vpbroadcastd %xmm1, %xmm1 1155; AVX2-NEXT: vmovdqa %xmm1, (%rsi) 1156; AVX2-NEXT: vpand %xmm0, %xmm1, %xmm0 1157; AVX2-NEXT: retq 1158; 1159; AVX512-LABEL: neg_scalar_broadcast_two_uses: 1160; AVX512: # %bb.0: 1161; AVX512-NEXT: notl %edi 1162; AVX512-NEXT: vpbroadcastd %edi, %xmm1 1163; AVX512-NEXT: vmovdqa %xmm1, (%rsi) 1164; AVX512-NEXT: vpand %xmm0, %xmm1, %xmm0 1165; AVX512-NEXT: retq 1166 %1 = xor i32 %a0, -1 1167 %2 = insertelement <4 x i32> undef, i32 %1, i64 0 1168 %3 = shufflevector <4 x i32> %2, <4 x i32> poison, <4 x i32> zeroinitializer 1169 store <4 x i32> %3, ptr %a2, align 16 1170 %4 = and <4 x i32> %3, %a1 1171 ret <4 x i32> %4 1172} 1173 1174; PR84660 - check for illegal types 1175define <2 x i128> @neg_scalar_broadcast_illegaltype(i128 %arg) { 1176; CHECK-LABEL: neg_scalar_broadcast_illegaltype: 1177; CHECK: # %bb.0: 1178; CHECK-NEXT: movq %rdi, %rax 1179; CHECK-NEXT: notl %esi 1180; CHECK-NEXT: andl $1, %esi 1181; CHECK-NEXT: movq %rsi, 16(%rdi) 1182; CHECK-NEXT: movq %rsi, (%rdi) 1183; CHECK-NEXT: movq $0, 24(%rdi) 1184; CHECK-NEXT: movq $0, 8(%rdi) 1185; CHECK-NEXT: retq 1186 %i = xor i128 %arg, 1 1187 %i1 = insertelement <2 x i128> zeroinitializer, i128 %i, i64 0 1188 %i2 = shufflevector <2 x i128> %i1, <2 x i128> zeroinitializer, <2 x i32> zeroinitializer 1189 %i3 = and <2 x i128> <i128 1, i128 1>, %i2 1190 ret <2 x i128> %i3 1191} 1192 1193define <2 x i64> @andnp_xx(<2 x i64> %v0) nounwind { 1194; SSE-LABEL: andnp_xx: 1195; SSE: # %bb.0: 1196; SSE-NEXT: xorps %xmm0, %xmm0 1197; SSE-NEXT: retq 1198; 1199; AVX-LABEL: andnp_xx: 1200; AVX: # %bb.0: 1201; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 1202; AVX-NEXT: retq 1203 %x = xor <2 x i64> %v0, <i64 -1, i64 -1> 1204 %y = and <2 x i64> %v0, %x 1205 ret <2 x i64> %y 1206} 1207 1208define <2 x i64> @andnp_xx_2(<2 x i64> %v0) nounwind { 1209; SSE-LABEL: andnp_xx_2: 1210; SSE: # %bb.0: 1211; SSE-NEXT: xorps %xmm0, %xmm0 1212; SSE-NEXT: retq 1213; 1214; AVX-LABEL: andnp_xx_2: 1215; AVX: # %bb.0: 1216; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 1217; AVX-NEXT: retq 1218 %x = xor <2 x i64> %v0, <i64 -1, i64 -1> 1219 %y = and <2 x i64> %x, %v0 1220 ret <2 x i64> %y 1221} 1222 1223define i64 @andn_xx(i64 %v0) nounwind { 1224; CHECK-LABEL: andn_xx: 1225; CHECK: # %bb.0: 1226; CHECK-NEXT: xorl %eax, %eax 1227; CHECK-NEXT: retq 1228 %x = xor i64 %v0, -1 1229 %y = and i64 %v0, %x 1230 ret i64 %y 1231} 1232 1233define i64 @andn_xx_2(i64 %v0) nounwind { 1234; CHECK-LABEL: andn_xx_2: 1235; CHECK: # %bb.0: 1236; CHECK-NEXT: xorl %eax, %eax 1237; CHECK-NEXT: retq 1238 %x = xor i64 %v0, -1 1239 %y = and i64 %x, %v0 1240 ret i64 %y 1241} 1242