1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop | FileCheck %s --check-prefix=XOP 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F 7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl | FileCheck %s --check-prefixes=AVX512,AVX512VL 8 9; 10; 128-bit vectors 11; 12 13define <2 x i64> @bitselect_v2i64_rr(<2 x i64>, <2 x i64>) { 14; SSE-LABEL: bitselect_v2i64_rr: 15; SSE: # %bb.0: 16; SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 17; SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 18; SSE-NEXT: orps %xmm1, %xmm0 19; SSE-NEXT: retq 20; 21; XOP-LABEL: bitselect_v2i64_rr: 22; XOP: # %bb.0: 23; XOP-NEXT: vpcmov {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1, %xmm0 24; XOP-NEXT: retq 25; 26; AVX-LABEL: bitselect_v2i64_rr: 27; AVX: # %bb.0: 28; AVX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 29; AVX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 30; AVX-NEXT: vorps %xmm0, %xmm1, %xmm0 31; AVX-NEXT: retq 32; 33; AVX512F-LABEL: bitselect_v2i64_rr: 34; AVX512F: # %bb.0: 35; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 36; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 37; AVX512F-NEXT: vpmovsxbd {{.*#+}} xmm2 = [4294967295,4294967294,4294967293,4294967292] 38; AVX512F-NEXT: vpternlogq $216, %zmm2, %zmm1, %zmm0 39; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 40; AVX512F-NEXT: vzeroupper 41; AVX512F-NEXT: retq 42; 43; AVX512VL-LABEL: bitselect_v2i64_rr: 44; AVX512VL: # %bb.0: 45; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0 46; AVX512VL-NEXT: retq 47 %3 = and <2 x i64> %0, <i64 4294967296, i64 12884901890> 48 %4 = and <2 x i64> %1, <i64 -4294967297, i64 -12884901891> 49 %5 = or <2 x i64> %4, %3 50 ret <2 x i64> %5 51} 52 53define <2 x i64> @bitselect_v2i64_rm(<2 x i64>, ptr nocapture readonly) { 54; SSE-LABEL: bitselect_v2i64_rm: 55; SSE: # %bb.0: 56; SSE-NEXT: movaps (%rdi), %xmm1 57; SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 58; SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 59; SSE-NEXT: orps %xmm1, %xmm0 60; SSE-NEXT: retq 61; 62; XOP-LABEL: bitselect_v2i64_rm: 63; XOP: # %bb.0: 64; XOP-NEXT: vmovdqa (%rdi), %xmm1 65; XOP-NEXT: vpcmov {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1, %xmm0 66; XOP-NEXT: retq 67; 68; AVX-LABEL: bitselect_v2i64_rm: 69; AVX: # %bb.0: 70; AVX-NEXT: vmovaps (%rdi), %xmm1 71; AVX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 72; AVX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 73; AVX-NEXT: vorps %xmm0, %xmm1, %xmm0 74; AVX-NEXT: retq 75; 76; AVX512F-LABEL: bitselect_v2i64_rm: 77; AVX512F: # %bb.0: 78; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 79; AVX512F-NEXT: vmovdqa (%rdi), %xmm1 80; AVX512F-NEXT: vpmovsxbd {{.*#+}} xmm2 = [4294967294,4294967293,4294967292,4294967295] 81; AVX512F-NEXT: vpternlogq $184, %zmm1, %zmm2, %zmm0 82; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 83; AVX512F-NEXT: vzeroupper 84; AVX512F-NEXT: retq 85; 86; AVX512VL-LABEL: bitselect_v2i64_rm: 87; AVX512VL: # %bb.0: 88; AVX512VL-NEXT: vmovdqa (%rdi), %xmm1 89; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0 90; AVX512VL-NEXT: retq 91 %3 = load <2 x i64>, ptr %1 92 %4 = and <2 x i64> %0, <i64 8589934593, i64 3> 93 %5 = and <2 x i64> %3, <i64 -8589934594, i64 -4> 94 %6 = or <2 x i64> %5, %4 95 ret <2 x i64> %6 96} 97 98define <2 x i64> @bitselect_v2i64_mr(ptr nocapture readonly, <2 x i64>) { 99; SSE-LABEL: bitselect_v2i64_mr: 100; SSE: # %bb.0: 101; SSE-NEXT: movaps (%rdi), %xmm1 102; SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 103; SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 104; SSE-NEXT: orps %xmm1, %xmm0 105; SSE-NEXT: retq 106; 107; XOP-LABEL: bitselect_v2i64_mr: 108; XOP: # %bb.0: 109; XOP-NEXT: vmovdqa (%rdi), %xmm1 110; XOP-NEXT: vpcmov {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1, %xmm0 111; XOP-NEXT: retq 112; 113; AVX-LABEL: bitselect_v2i64_mr: 114; AVX: # %bb.0: 115; AVX-NEXT: vmovaps (%rdi), %xmm1 116; AVX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 117; AVX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 118; AVX-NEXT: vorps %xmm0, %xmm1, %xmm0 119; AVX-NEXT: retq 120; 121; AVX512F-LABEL: bitselect_v2i64_mr: 122; AVX512F: # %bb.0: 123; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 124; AVX512F-NEXT: vmovdqa (%rdi), %xmm1 125; AVX512F-NEXT: vpmovsxbd {{.*#+}} xmm2 = [2,3,0,1] 126; AVX512F-NEXT: vpternlogq $184, %zmm1, %zmm2, %zmm0 127; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 128; AVX512F-NEXT: vzeroupper 129; AVX512F-NEXT: retq 130; 131; AVX512VL-LABEL: bitselect_v2i64_mr: 132; AVX512VL: # %bb.0: 133; AVX512VL-NEXT: vmovdqa (%rdi), %xmm1 134; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0 135; AVX512VL-NEXT: retq 136 %3 = load <2 x i64>, ptr %0 137 %4 = and <2 x i64> %3, <i64 12884901890, i64 4294967296> 138 %5 = and <2 x i64> %1, <i64 -12884901891, i64 -4294967297> 139 %6 = or <2 x i64> %4, %5 140 ret <2 x i64> %6 141} 142 143define <2 x i64> @bitselect_v2i64_mm(ptr nocapture readonly, ptr nocapture readonly) { 144; SSE-LABEL: bitselect_v2i64_mm: 145; SSE: # %bb.0: 146; SSE-NEXT: movaps (%rdi), %xmm1 147; SSE-NEXT: movaps (%rsi), %xmm0 148; SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 149; SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 150; SSE-NEXT: orps %xmm1, %xmm0 151; SSE-NEXT: retq 152; 153; XOP-LABEL: bitselect_v2i64_mm: 154; XOP: # %bb.0: 155; XOP-NEXT: vmovdqa (%rsi), %xmm0 156; XOP-NEXT: vpmovsxbd {{.*#+}} xmm1 = [4294967292,4294967295,4294967294,4294967293] 157; XOP-NEXT: vpcmov %xmm1, (%rdi), %xmm0, %xmm0 158; XOP-NEXT: retq 159; 160; AVX-LABEL: bitselect_v2i64_mm: 161; AVX: # %bb.0: 162; AVX-NEXT: vmovaps (%rdi), %xmm0 163; AVX-NEXT: vmovaps (%rsi), %xmm1 164; AVX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 165; AVX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 166; AVX-NEXT: vorps %xmm0, %xmm1, %xmm0 167; AVX-NEXT: retq 168; 169; AVX512F-LABEL: bitselect_v2i64_mm: 170; AVX512F: # %bb.0: 171; AVX512F-NEXT: vmovdqa (%rdi), %xmm1 172; AVX512F-NEXT: vmovdqa (%rsi), %xmm0 173; AVX512F-NEXT: vpmovsxbd {{.*#+}} xmm2 = [4294967292,4294967295,4294967294,4294967293] 174; AVX512F-NEXT: vpternlogq $226, %zmm1, %zmm2, %zmm0 175; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 176; AVX512F-NEXT: vzeroupper 177; AVX512F-NEXT: retq 178; 179; AVX512VL-LABEL: bitselect_v2i64_mm: 180; AVX512VL: # %bb.0: 181; AVX512VL-NEXT: vmovdqa (%rsi), %xmm1 182; AVX512VL-NEXT: vpmovsxbd {{.*#+}} xmm0 = [4294967292,4294967295,4294967294,4294967293] 183; AVX512VL-NEXT: vpternlogq $202, (%rdi), %xmm1, %xmm0 184; AVX512VL-NEXT: retq 185 %3 = load <2 x i64>, ptr %0 186 %4 = load <2 x i64>, ptr %1 187 %5 = and <2 x i64> %3, <i64 3, i64 8589934593> 188 %6 = and <2 x i64> %4, <i64 -4, i64 -8589934594> 189 %7 = or <2 x i64> %6, %5 190 ret <2 x i64> %7 191} 192 193define <2 x i64> @bitselect_v2i64_broadcast_rrr(<2 x i64> %a0, <2 x i64> %a1, i64 %a2) { 194; SSE-LABEL: bitselect_v2i64_broadcast_rrr: 195; SSE: # %bb.0: 196; SSE-NEXT: movq %rdi, %xmm2 197; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,1,0,1] 198; SSE-NEXT: pand %xmm2, %xmm0 199; SSE-NEXT: pandn %xmm1, %xmm2 200; SSE-NEXT: por %xmm2, %xmm0 201; SSE-NEXT: retq 202; 203; XOP-LABEL: bitselect_v2i64_broadcast_rrr: 204; XOP: # %bb.0: 205; XOP-NEXT: vmovq %rdi, %xmm2 206; XOP-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,1] 207; XOP-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0 208; XOP-NEXT: retq 209; 210; AVX1-LABEL: bitselect_v2i64_broadcast_rrr: 211; AVX1: # %bb.0: 212; AVX1-NEXT: vmovq %rdi, %xmm2 213; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,1] 214; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 215; AVX1-NEXT: vpandn %xmm1, %xmm2, %xmm1 216; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 217; AVX1-NEXT: retq 218; 219; AVX2-LABEL: bitselect_v2i64_broadcast_rrr: 220; AVX2: # %bb.0: 221; AVX2-NEXT: vmovq %rdi, %xmm2 222; AVX2-NEXT: vpbroadcastq %xmm2, %xmm2 223; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0 224; AVX2-NEXT: vpandn %xmm1, %xmm2, %xmm1 225; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 226; AVX2-NEXT: retq 227; 228; AVX512F-LABEL: bitselect_v2i64_broadcast_rrr: 229; AVX512F: # %bb.0: 230; AVX512F-NEXT: vmovq %rdi, %xmm2 231; AVX512F-NEXT: vpbroadcastq %xmm2, %xmm2 232; AVX512F-NEXT: vpand %xmm2, %xmm0, %xmm0 233; AVX512F-NEXT: vpandn %xmm1, %xmm2, %xmm1 234; AVX512F-NEXT: vpor %xmm1, %xmm0, %xmm0 235; AVX512F-NEXT: retq 236; 237; AVX512VL-LABEL: bitselect_v2i64_broadcast_rrr: 238; AVX512VL: # %bb.0: 239; AVX512VL-NEXT: vpbroadcastq %rdi, %xmm2 240; AVX512VL-NEXT: vpternlogq $226, %xmm1, %xmm2, %xmm0 241; AVX512VL-NEXT: retq 242 %1 = insertelement <2 x i64> undef, i64 %a2, i32 0 243 %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> zeroinitializer 244 %3 = xor <2 x i64> %1, <i64 -1, i64 undef> 245 %4 = shufflevector <2 x i64> %3, <2 x i64> undef, <2 x i32> zeroinitializer 246 %5 = and <2 x i64> %a0, %2 247 %6 = and <2 x i64> %a1, %4 248 %7 = or <2 x i64> %5, %6 249 ret <2 x i64> %7 250} 251 252define <2 x i64> @bitselect_v2i64_broadcast_rrm(<2 x i64> %a0, <2 x i64> %a1, ptr %p2) { 253; SSE-LABEL: bitselect_v2i64_broadcast_rrm: 254; SSE: # %bb.0: 255; SSE-NEXT: movq {{.*#+}} xmm2 = mem[0],zero 256; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,1,0,1] 257; SSE-NEXT: pand %xmm2, %xmm0 258; SSE-NEXT: pandn %xmm1, %xmm2 259; SSE-NEXT: por %xmm2, %xmm0 260; SSE-NEXT: retq 261; 262; XOP-LABEL: bitselect_v2i64_broadcast_rrm: 263; XOP: # %bb.0: 264; XOP-NEXT: vmovddup {{.*#+}} xmm2 = mem[0,0] 265; XOP-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0 266; XOP-NEXT: retq 267; 268; AVX-LABEL: bitselect_v2i64_broadcast_rrm: 269; AVX: # %bb.0: 270; AVX-NEXT: vmovddup {{.*#+}} xmm2 = mem[0,0] 271; AVX-NEXT: vandps %xmm2, %xmm0, %xmm0 272; AVX-NEXT: vandnps %xmm1, %xmm2, %xmm1 273; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0 274; AVX-NEXT: retq 275; 276; AVX512F-LABEL: bitselect_v2i64_broadcast_rrm: 277; AVX512F: # %bb.0: 278; AVX512F-NEXT: vmovddup {{.*#+}} xmm2 = mem[0,0] 279; AVX512F-NEXT: vandps %xmm2, %xmm0, %xmm0 280; AVX512F-NEXT: vandnps %xmm1, %xmm2, %xmm1 281; AVX512F-NEXT: vorps %xmm1, %xmm0, %xmm0 282; AVX512F-NEXT: retq 283; 284; AVX512VL-LABEL: bitselect_v2i64_broadcast_rrm: 285; AVX512VL: # %bb.0: 286; AVX512VL-NEXT: vpternlogq $228, (%rdi){1to2}, %xmm1, %xmm0 287; AVX512VL-NEXT: retq 288 %a2 = load i64, ptr %p2 289 %1 = insertelement <2 x i64> undef, i64 %a2, i32 0 290 %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> zeroinitializer 291 %3 = xor <2 x i64> %1, <i64 -1, i64 undef> 292 %4 = shufflevector <2 x i64> %3, <2 x i64> undef, <2 x i32> zeroinitializer 293 %5 = and <2 x i64> %a0, %2 294 %6 = and <2 x i64> %a1, %4 295 %7 = or <2 x i64> %5, %6 296 ret <2 x i64> %7 297} 298 299; 300; 256-bit vectors 301; 302 303define <4 x i64> @bitselect_v4i64_rr(<4 x i64>, <4 x i64>) { 304; SSE-LABEL: bitselect_v4i64_rr: 305; SSE: # %bb.0: 306; SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 307; SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 308; SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 309; SSE-NEXT: orps %xmm3, %xmm1 310; SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 311; SSE-NEXT: orps %xmm2, %xmm0 312; SSE-NEXT: retq 313; 314; XOP-LABEL: bitselect_v4i64_rr: 315; XOP: # %bb.0: 316; XOP-NEXT: vpcmov {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1, %ymm0 317; XOP-NEXT: retq 318; 319; AVX-LABEL: bitselect_v4i64_rr: 320; AVX: # %bb.0: 321; AVX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 322; AVX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 323; AVX-NEXT: vorps %ymm0, %ymm1, %ymm0 324; AVX-NEXT: retq 325; 326; AVX512F-LABEL: bitselect_v4i64_rr: 327; AVX512F: # %bb.0: 328; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 329; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 330; AVX512F-NEXT: vpmovsxbd {{.*#+}} ymm2 = [4294967295,4294967294,4294967293,4294967292,4294967293,4294967292,4294967293,4294967292] 331; AVX512F-NEXT: vpternlogq $216, %zmm2, %zmm1, %zmm0 332; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 333; AVX512F-NEXT: retq 334; 335; AVX512VL-LABEL: bitselect_v4i64_rr: 336; AVX512VL: # %bb.0: 337; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0 338; AVX512VL-NEXT: retq 339 %3 = and <4 x i64> %0, <i64 4294967296, i64 12884901890, i64 12884901890, i64 12884901890> 340 %4 = and <4 x i64> %1, <i64 -4294967297, i64 -12884901891, i64 -12884901891, i64 -12884901891> 341 %5 = or <4 x i64> %4, %3 342 ret <4 x i64> %5 343} 344 345define <4 x i64> @bitselect_v4i64_rm(<4 x i64>, ptr nocapture readonly) { 346; SSE-LABEL: bitselect_v4i64_rm: 347; SSE: # %bb.0: 348; SSE-NEXT: movaps {{.*#+}} xmm2 = [18446744065119617022,18446744073709551612] 349; SSE-NEXT: movaps 16(%rdi), %xmm4 350; SSE-NEXT: andps %xmm2, %xmm4 351; SSE-NEXT: movaps (%rdi), %xmm5 352; SSE-NEXT: andps %xmm2, %xmm5 353; SSE-NEXT: movaps %xmm2, %xmm3 354; SSE-NEXT: andnps %xmm0, %xmm3 355; SSE-NEXT: orps %xmm5, %xmm3 356; SSE-NEXT: andnps %xmm1, %xmm2 357; SSE-NEXT: orps %xmm4, %xmm2 358; SSE-NEXT: movaps %xmm3, %xmm0 359; SSE-NEXT: movaps %xmm2, %xmm1 360; SSE-NEXT: retq 361; 362; XOP-LABEL: bitselect_v4i64_rm: 363; XOP: # %bb.0: 364; XOP-NEXT: vmovdqa (%rdi), %ymm1 365; XOP-NEXT: vpcmov {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1, %ymm0 366; XOP-NEXT: retq 367; 368; AVX-LABEL: bitselect_v4i64_rm: 369; AVX: # %bb.0: 370; AVX-NEXT: vmovaps (%rdi), %ymm1 371; AVX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 372; AVX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 373; AVX-NEXT: vorps %ymm0, %ymm1, %ymm0 374; AVX-NEXT: retq 375; 376; AVX512F-LABEL: bitselect_v4i64_rm: 377; AVX512F: # %bb.0: 378; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 379; AVX512F-NEXT: vmovdqa (%rdi), %ymm1 380; AVX512F-NEXT: vpmovsxbd {{.*#+}} ymm2 = [4294967294,4294967293,4294967292,4294967295,4294967294,4294967293,4294967292,4294967295] 381; AVX512F-NEXT: vpternlogq $184, %zmm1, %zmm2, %zmm0 382; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 383; AVX512F-NEXT: retq 384; 385; AVX512VL-LABEL: bitselect_v4i64_rm: 386; AVX512VL: # %bb.0: 387; AVX512VL-NEXT: vmovdqa (%rdi), %ymm1 388; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0 389; AVX512VL-NEXT: retq 390 %3 = load <4 x i64>, ptr %1 391 %4 = and <4 x i64> %0, <i64 8589934593, i64 3, i64 8589934593, i64 3> 392 %5 = and <4 x i64> %3, <i64 -8589934594, i64 -4, i64 -8589934594, i64 -4> 393 %6 = or <4 x i64> %5, %4 394 ret <4 x i64> %6 395} 396 397define <4 x i64> @bitselect_v4i64_mr(ptr nocapture readonly, <4 x i64>) { 398; SSE-LABEL: bitselect_v4i64_mr: 399; SSE: # %bb.0: 400; SSE-NEXT: movaps {{.*#+}} xmm2 = [12884901890,4294967296] 401; SSE-NEXT: movaps 16(%rdi), %xmm4 402; SSE-NEXT: andps %xmm2, %xmm4 403; SSE-NEXT: movaps (%rdi), %xmm5 404; SSE-NEXT: andps %xmm2, %xmm5 405; SSE-NEXT: movaps %xmm2, %xmm3 406; SSE-NEXT: andnps %xmm0, %xmm3 407; SSE-NEXT: orps %xmm5, %xmm3 408; SSE-NEXT: andnps %xmm1, %xmm2 409; SSE-NEXT: orps %xmm4, %xmm2 410; SSE-NEXT: movaps %xmm3, %xmm0 411; SSE-NEXT: movaps %xmm2, %xmm1 412; SSE-NEXT: retq 413; 414; XOP-LABEL: bitselect_v4i64_mr: 415; XOP: # %bb.0: 416; XOP-NEXT: vmovdqa (%rdi), %ymm1 417; XOP-NEXT: vpcmov {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1, %ymm0 418; XOP-NEXT: retq 419; 420; AVX-LABEL: bitselect_v4i64_mr: 421; AVX: # %bb.0: 422; AVX-NEXT: vmovaps (%rdi), %ymm1 423; AVX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 424; AVX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 425; AVX-NEXT: vorps %ymm0, %ymm1, %ymm0 426; AVX-NEXT: retq 427; 428; AVX512F-LABEL: bitselect_v4i64_mr: 429; AVX512F: # %bb.0: 430; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 431; AVX512F-NEXT: vmovdqa (%rdi), %ymm1 432; AVX512F-NEXT: vpmovsxbd {{.*#+}} ymm2 = [2,3,0,1,2,3,0,1] 433; AVX512F-NEXT: vpternlogq $184, %zmm1, %zmm2, %zmm0 434; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 435; AVX512F-NEXT: retq 436; 437; AVX512VL-LABEL: bitselect_v4i64_mr: 438; AVX512VL: # %bb.0: 439; AVX512VL-NEXT: vmovdqa (%rdi), %ymm1 440; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0 441; AVX512VL-NEXT: retq 442 %3 = load <4 x i64>, ptr %0 443 %4 = and <4 x i64> %3, <i64 12884901890, i64 4294967296, i64 12884901890, i64 4294967296> 444 %5 = and <4 x i64> %1, <i64 -12884901891, i64 -4294967297, i64 -12884901891, i64 -4294967297> 445 %6 = or <4 x i64> %4, %5 446 ret <4 x i64> %6 447} 448 449define <4 x i64> @bitselect_v4i64_mm(ptr nocapture readonly, ptr nocapture readonly) { 450; SSE-LABEL: bitselect_v4i64_mm: 451; SSE: # %bb.0: 452; SSE-NEXT: movaps {{.*#+}} xmm1 = [18446744073709551612,18446744065119617022] 453; SSE-NEXT: movaps 16(%rsi), %xmm2 454; SSE-NEXT: andps %xmm1, %xmm2 455; SSE-NEXT: movaps (%rsi), %xmm3 456; SSE-NEXT: andps %xmm1, %xmm3 457; SSE-NEXT: movaps %xmm1, %xmm0 458; SSE-NEXT: andnps (%rdi), %xmm0 459; SSE-NEXT: orps %xmm3, %xmm0 460; SSE-NEXT: andnps 16(%rdi), %xmm1 461; SSE-NEXT: orps %xmm2, %xmm1 462; SSE-NEXT: retq 463; 464; XOP-LABEL: bitselect_v4i64_mm: 465; XOP: # %bb.0: 466; XOP-NEXT: vmovdqa (%rsi), %ymm0 467; XOP-NEXT: vbroadcastf128 {{.*#+}} ymm1 = [18446744073709551612,18446744065119617022,18446744073709551612,18446744065119617022] 468; XOP-NEXT: # ymm1 = mem[0,1,0,1] 469; XOP-NEXT: vpcmov %ymm1, (%rdi), %ymm0, %ymm0 470; XOP-NEXT: retq 471; 472; AVX-LABEL: bitselect_v4i64_mm: 473; AVX: # %bb.0: 474; AVX-NEXT: vmovaps (%rdi), %ymm0 475; AVX-NEXT: vmovaps (%rsi), %ymm1 476; AVX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 477; AVX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 478; AVX-NEXT: vorps %ymm0, %ymm1, %ymm0 479; AVX-NEXT: retq 480; 481; AVX512F-LABEL: bitselect_v4i64_mm: 482; AVX512F: # %bb.0: 483; AVX512F-NEXT: vmovdqa (%rdi), %ymm1 484; AVX512F-NEXT: vmovdqa (%rsi), %ymm0 485; AVX512F-NEXT: vpmovsxbd {{.*#+}} ymm2 = [4294967292,4294967295,4294967294,4294967293,4294967292,4294967295,4294967294,4294967293] 486; AVX512F-NEXT: vpternlogq $226, %zmm1, %zmm2, %zmm0 487; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 488; AVX512F-NEXT: retq 489; 490; AVX512VL-LABEL: bitselect_v4i64_mm: 491; AVX512VL: # %bb.0: 492; AVX512VL-NEXT: vmovdqa (%rsi), %ymm1 493; AVX512VL-NEXT: vpmovsxbd {{.*#+}} ymm0 = [4294967292,4294967295,4294967294,4294967293,4294967292,4294967295,4294967294,4294967293] 494; AVX512VL-NEXT: vpternlogq $202, (%rdi), %ymm1, %ymm0 495; AVX512VL-NEXT: retq 496 %3 = load <4 x i64>, ptr %0 497 %4 = load <4 x i64>, ptr %1 498 %5 = and <4 x i64> %3, <i64 3, i64 8589934593, i64 3, i64 8589934593> 499 %6 = and <4 x i64> %4, <i64 -4, i64 -8589934594, i64 -4, i64 -8589934594> 500 %7 = or <4 x i64> %6, %5 501 ret <4 x i64> %7 502} 503 504define <4 x i64> @bitselect_v4i64_broadcast_rrr(<4 x i64> %a0, <4 x i64> %a1, i64 %a2) { 505; SSE-LABEL: bitselect_v4i64_broadcast_rrr: 506; SSE: # %bb.0: 507; SSE-NEXT: movq %rdi, %xmm4 508; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,1,0,1] 509; SSE-NEXT: pand %xmm4, %xmm1 510; SSE-NEXT: pand %xmm4, %xmm0 511; SSE-NEXT: movdqa %xmm4, %xmm5 512; SSE-NEXT: pandn %xmm3, %xmm5 513; SSE-NEXT: por %xmm5, %xmm1 514; SSE-NEXT: pandn %xmm2, %xmm4 515; SSE-NEXT: por %xmm4, %xmm0 516; SSE-NEXT: retq 517; 518; XOP-LABEL: bitselect_v4i64_broadcast_rrr: 519; XOP: # %bb.0: 520; XOP-NEXT: vmovq %rdi, %xmm2 521; XOP-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,1] 522; XOP-NEXT: vinsertf128 $1, %xmm2, %ymm2, %ymm2 523; XOP-NEXT: vpcmov %ymm2, %ymm1, %ymm0, %ymm0 524; XOP-NEXT: retq 525; 526; AVX1-LABEL: bitselect_v4i64_broadcast_rrr: 527; AVX1: # %bb.0: 528; AVX1-NEXT: vmovq %rdi, %xmm2 529; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,1] 530; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm2, %ymm2 531; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0 532; AVX1-NEXT: vandnps %ymm1, %ymm2, %ymm1 533; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 534; AVX1-NEXT: retq 535; 536; AVX2-LABEL: bitselect_v4i64_broadcast_rrr: 537; AVX2: # %bb.0: 538; AVX2-NEXT: vmovq %rdi, %xmm2 539; AVX2-NEXT: vpbroadcastq %xmm2, %ymm2 540; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0 541; AVX2-NEXT: vpandn %ymm1, %ymm2, %ymm1 542; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 543; AVX2-NEXT: retq 544; 545; AVX512F-LABEL: bitselect_v4i64_broadcast_rrr: 546; AVX512F: # %bb.0: 547; AVX512F-NEXT: vmovq %rdi, %xmm2 548; AVX512F-NEXT: vpbroadcastq %xmm2, %ymm2 549; AVX512F-NEXT: vpand %ymm2, %ymm0, %ymm0 550; AVX512F-NEXT: vpandn %ymm1, %ymm2, %ymm1 551; AVX512F-NEXT: vpor %ymm1, %ymm0, %ymm0 552; AVX512F-NEXT: retq 553; 554; AVX512VL-LABEL: bitselect_v4i64_broadcast_rrr: 555; AVX512VL: # %bb.0: 556; AVX512VL-NEXT: vpbroadcastq %rdi, %ymm2 557; AVX512VL-NEXT: vpternlogq $226, %ymm1, %ymm2, %ymm0 558; AVX512VL-NEXT: retq 559 %1 = insertelement <4 x i64> undef, i64 %a2, i32 0 560 %2 = shufflevector <4 x i64> %1, <4 x i64> undef, <4 x i32> zeroinitializer 561 %3 = xor <4 x i64> %1, <i64 -1, i64 undef, i64 undef, i64 undef> 562 %4 = shufflevector <4 x i64> %3, <4 x i64> undef, <4 x i32> zeroinitializer 563 %5 = and <4 x i64> %a0, %2 564 %6 = and <4 x i64> %a1, %4 565 %7 = or <4 x i64> %5, %6 566 ret <4 x i64> %7 567} 568 569define <4 x i64> @bitselect_v4i64_broadcast_rrm(<4 x i64> %a0, <4 x i64> %a1, ptr %p2) { 570; SSE-LABEL: bitselect_v4i64_broadcast_rrm: 571; SSE: # %bb.0: 572; SSE-NEXT: movq {{.*#+}} xmm4 = mem[0],zero 573; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,1,0,1] 574; SSE-NEXT: pand %xmm4, %xmm1 575; SSE-NEXT: pand %xmm4, %xmm0 576; SSE-NEXT: movdqa %xmm4, %xmm5 577; SSE-NEXT: pandn %xmm3, %xmm5 578; SSE-NEXT: por %xmm5, %xmm1 579; SSE-NEXT: pandn %xmm2, %xmm4 580; SSE-NEXT: por %xmm4, %xmm0 581; SSE-NEXT: retq 582; 583; XOP-LABEL: bitselect_v4i64_broadcast_rrm: 584; XOP: # %bb.0: 585; XOP-NEXT: vbroadcastsd (%rdi), %ymm2 586; XOP-NEXT: vpcmov %ymm2, %ymm1, %ymm0, %ymm0 587; XOP-NEXT: retq 588; 589; AVX-LABEL: bitselect_v4i64_broadcast_rrm: 590; AVX: # %bb.0: 591; AVX-NEXT: vbroadcastsd (%rdi), %ymm2 592; AVX-NEXT: vandps %ymm2, %ymm0, %ymm0 593; AVX-NEXT: vandnps %ymm1, %ymm2, %ymm1 594; AVX-NEXT: vorps %ymm1, %ymm0, %ymm0 595; AVX-NEXT: retq 596; 597; AVX512F-LABEL: bitselect_v4i64_broadcast_rrm: 598; AVX512F: # %bb.0: 599; AVX512F-NEXT: vbroadcastsd (%rdi), %ymm2 600; AVX512F-NEXT: vandps %ymm2, %ymm0, %ymm0 601; AVX512F-NEXT: vandnps %ymm1, %ymm2, %ymm1 602; AVX512F-NEXT: vorps %ymm1, %ymm0, %ymm0 603; AVX512F-NEXT: retq 604; 605; AVX512VL-LABEL: bitselect_v4i64_broadcast_rrm: 606; AVX512VL: # %bb.0: 607; AVX512VL-NEXT: vpternlogq $228, (%rdi){1to4}, %ymm1, %ymm0 608; AVX512VL-NEXT: retq 609 %a2 = load i64, ptr %p2 610 %1 = insertelement <4 x i64> undef, i64 %a2, i32 0 611 %2 = shufflevector <4 x i64> %1, <4 x i64> undef, <4 x i32> zeroinitializer 612 %3 = xor <4 x i64> %1, <i64 -1, i64 undef, i64 undef, i64 undef> 613 %4 = shufflevector <4 x i64> %3, <4 x i64> undef, <4 x i32> zeroinitializer 614 %5 = and <4 x i64> %a0, %2 615 %6 = and <4 x i64> %a1, %4 616 %7 = or <4 x i64> %5, %6 617 ret <4 x i64> %7 618} 619 620; 621; 512-bit vectors 622; 623 624define <8 x i64> @bitselect_v8i64_rr(<8 x i64>, <8 x i64>) { 625; SSE-LABEL: bitselect_v8i64_rr: 626; SSE: # %bb.0: 627; SSE-NEXT: movaps {{.*#+}} xmm8 = [18446744060824649725,18446744060824649725] 628; SSE-NEXT: andps %xmm8, %xmm7 629; SSE-NEXT: movaps {{.*#+}} xmm9 = [18446744069414584319,18446744060824649725] 630; SSE-NEXT: andps %xmm9, %xmm6 631; SSE-NEXT: andps %xmm8, %xmm5 632; SSE-NEXT: andps %xmm9, %xmm4 633; SSE-NEXT: movaps %xmm9, %xmm10 634; SSE-NEXT: andnps %xmm0, %xmm10 635; SSE-NEXT: orps %xmm4, %xmm10 636; SSE-NEXT: movaps %xmm8, %xmm4 637; SSE-NEXT: andnps %xmm1, %xmm4 638; SSE-NEXT: orps %xmm5, %xmm4 639; SSE-NEXT: andnps %xmm2, %xmm9 640; SSE-NEXT: orps %xmm6, %xmm9 641; SSE-NEXT: andnps %xmm3, %xmm8 642; SSE-NEXT: orps %xmm7, %xmm8 643; SSE-NEXT: movaps %xmm10, %xmm0 644; SSE-NEXT: movaps %xmm4, %xmm1 645; SSE-NEXT: movaps %xmm9, %xmm2 646; SSE-NEXT: movaps %xmm8, %xmm3 647; SSE-NEXT: retq 648; 649; XOP-LABEL: bitselect_v8i64_rr: 650; XOP: # %bb.0: 651; XOP-NEXT: vmovdqa {{.*#+}} ymm4 = [18446744069414584319,18446744060824649725,18446744060824649725,18446744060824649725] 652; XOP-NEXT: vpcmov %ymm4, %ymm0, %ymm2, %ymm0 653; XOP-NEXT: vpcmov %ymm4, %ymm1, %ymm3, %ymm1 654; XOP-NEXT: retq 655; 656; AVX-LABEL: bitselect_v8i64_rr: 657; AVX: # %bb.0: 658; AVX-NEXT: vmovaps {{.*#+}} ymm4 = [18446744069414584319,18446744060824649725,18446744060824649725,18446744060824649725] 659; AVX-NEXT: vandps %ymm4, %ymm3, %ymm3 660; AVX-NEXT: vandps %ymm4, %ymm2, %ymm2 661; AVX-NEXT: vandnps %ymm0, %ymm4, %ymm0 662; AVX-NEXT: vorps %ymm0, %ymm2, %ymm0 663; AVX-NEXT: vandnps %ymm1, %ymm4, %ymm1 664; AVX-NEXT: vorps %ymm1, %ymm3, %ymm1 665; AVX-NEXT: retq 666; 667; AVX512-LABEL: bitselect_v8i64_rr: 668; AVX512: # %bb.0: 669; AVX512-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm0 670; AVX512-NEXT: retq 671 %3 = and <8 x i64> %0, <i64 4294967296, i64 12884901890, i64 12884901890, i64 12884901890, i64 4294967296, i64 12884901890, i64 12884901890, i64 12884901890> 672 %4 = and <8 x i64> %1, <i64 -4294967297, i64 -12884901891, i64 -12884901891, i64 -12884901891, i64 -4294967297, i64 -12884901891, i64 -12884901891, i64 -12884901891> 673 %5 = or <8 x i64> %4, %3 674 ret <8 x i64> %5 675} 676 677define <8 x i64> @bitselect_v8i64_rm(<8 x i64>, ptr nocapture readonly) { 678; SSE-LABEL: bitselect_v8i64_rm: 679; SSE: # %bb.0: 680; SSE-NEXT: movaps {{.*#+}} xmm4 = [18446744065119617022,18446744073709551612] 681; SSE-NEXT: movaps 48(%rdi), %xmm8 682; SSE-NEXT: andps %xmm4, %xmm8 683; SSE-NEXT: movaps 32(%rdi), %xmm9 684; SSE-NEXT: andps %xmm4, %xmm9 685; SSE-NEXT: movaps 16(%rdi), %xmm7 686; SSE-NEXT: andps %xmm4, %xmm7 687; SSE-NEXT: movaps (%rdi), %xmm6 688; SSE-NEXT: andps %xmm4, %xmm6 689; SSE-NEXT: movaps %xmm4, %xmm5 690; SSE-NEXT: andnps %xmm0, %xmm5 691; SSE-NEXT: orps %xmm6, %xmm5 692; SSE-NEXT: movaps %xmm4, %xmm6 693; SSE-NEXT: andnps %xmm1, %xmm6 694; SSE-NEXT: orps %xmm7, %xmm6 695; SSE-NEXT: movaps %xmm4, %xmm7 696; SSE-NEXT: andnps %xmm2, %xmm7 697; SSE-NEXT: orps %xmm9, %xmm7 698; SSE-NEXT: andnps %xmm3, %xmm4 699; SSE-NEXT: orps %xmm8, %xmm4 700; SSE-NEXT: movaps %xmm5, %xmm0 701; SSE-NEXT: movaps %xmm6, %xmm1 702; SSE-NEXT: movaps %xmm7, %xmm2 703; SSE-NEXT: movaps %xmm4, %xmm3 704; SSE-NEXT: retq 705; 706; XOP-LABEL: bitselect_v8i64_rm: 707; XOP: # %bb.0: 708; XOP-NEXT: vmovdqa (%rdi), %ymm2 709; XOP-NEXT: vmovdqa 32(%rdi), %ymm3 710; XOP-NEXT: vbroadcastf128 {{.*#+}} ymm4 = [18446744065119617022,18446744073709551612,18446744065119617022,18446744073709551612] 711; XOP-NEXT: # ymm4 = mem[0,1,0,1] 712; XOP-NEXT: vpcmov %ymm4, %ymm0, %ymm2, %ymm0 713; XOP-NEXT: vpcmov %ymm4, %ymm1, %ymm3, %ymm1 714; XOP-NEXT: retq 715; 716; AVX-LABEL: bitselect_v8i64_rm: 717; AVX: # %bb.0: 718; AVX-NEXT: vbroadcastf128 {{.*#+}} ymm2 = [18446744065119617022,18446744073709551612,18446744065119617022,18446744073709551612] 719; AVX-NEXT: # ymm2 = mem[0,1,0,1] 720; AVX-NEXT: vandps 32(%rdi), %ymm2, %ymm3 721; AVX-NEXT: vandps (%rdi), %ymm2, %ymm4 722; AVX-NEXT: vandnps %ymm0, %ymm2, %ymm0 723; AVX-NEXT: vorps %ymm0, %ymm4, %ymm0 724; AVX-NEXT: vandnps %ymm1, %ymm2, %ymm1 725; AVX-NEXT: vorps %ymm1, %ymm3, %ymm1 726; AVX-NEXT: retq 727; 728; AVX512-LABEL: bitselect_v8i64_rm: 729; AVX512: # %bb.0: 730; AVX512-NEXT: vmovdqa64 (%rdi), %zmm1 731; AVX512-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm0 732; AVX512-NEXT: retq 733 %3 = load <8 x i64>, ptr %1 734 %4 = and <8 x i64> %0, <i64 8589934593, i64 3, i64 8589934593, i64 3, i64 8589934593, i64 3, i64 8589934593, i64 3> 735 %5 = and <8 x i64> %3, <i64 -8589934594, i64 -4, i64 -8589934594, i64 -4, i64 -8589934594, i64 -4, i64 -8589934594, i64 -4> 736 %6 = or <8 x i64> %5, %4 737 ret <8 x i64> %6 738} 739 740define <8 x i64> @bitselect_v8i64_mr(ptr nocapture readonly, <8 x i64>) { 741; SSE-LABEL: bitselect_v8i64_mr: 742; SSE: # %bb.0: 743; SSE-NEXT: movaps {{.*#+}} xmm4 = [12884901890,4294967296] 744; SSE-NEXT: movaps 48(%rdi), %xmm8 745; SSE-NEXT: andps %xmm4, %xmm8 746; SSE-NEXT: movaps 32(%rdi), %xmm9 747; SSE-NEXT: andps %xmm4, %xmm9 748; SSE-NEXT: movaps 16(%rdi), %xmm7 749; SSE-NEXT: andps %xmm4, %xmm7 750; SSE-NEXT: movaps (%rdi), %xmm6 751; SSE-NEXT: andps %xmm4, %xmm6 752; SSE-NEXT: movaps %xmm4, %xmm5 753; SSE-NEXT: andnps %xmm0, %xmm5 754; SSE-NEXT: orps %xmm6, %xmm5 755; SSE-NEXT: movaps %xmm4, %xmm6 756; SSE-NEXT: andnps %xmm1, %xmm6 757; SSE-NEXT: orps %xmm7, %xmm6 758; SSE-NEXT: movaps %xmm4, %xmm7 759; SSE-NEXT: andnps %xmm2, %xmm7 760; SSE-NEXT: orps %xmm9, %xmm7 761; SSE-NEXT: andnps %xmm3, %xmm4 762; SSE-NEXT: orps %xmm8, %xmm4 763; SSE-NEXT: movaps %xmm5, %xmm0 764; SSE-NEXT: movaps %xmm6, %xmm1 765; SSE-NEXT: movaps %xmm7, %xmm2 766; SSE-NEXT: movaps %xmm4, %xmm3 767; SSE-NEXT: retq 768; 769; XOP-LABEL: bitselect_v8i64_mr: 770; XOP: # %bb.0: 771; XOP-NEXT: vmovdqa (%rdi), %ymm2 772; XOP-NEXT: vmovdqa 32(%rdi), %ymm3 773; XOP-NEXT: vbroadcastf128 {{.*#+}} ymm4 = [12884901890,4294967296,12884901890,4294967296] 774; XOP-NEXT: # ymm4 = mem[0,1,0,1] 775; XOP-NEXT: vpcmov %ymm4, %ymm0, %ymm2, %ymm0 776; XOP-NEXT: vpcmov %ymm4, %ymm1, %ymm3, %ymm1 777; XOP-NEXT: retq 778; 779; AVX-LABEL: bitselect_v8i64_mr: 780; AVX: # %bb.0: 781; AVX-NEXT: vbroadcastf128 {{.*#+}} ymm2 = [12884901890,4294967296,12884901890,4294967296] 782; AVX-NEXT: # ymm2 = mem[0,1,0,1] 783; AVX-NEXT: vandps 32(%rdi), %ymm2, %ymm3 784; AVX-NEXT: vandps (%rdi), %ymm2, %ymm4 785; AVX-NEXT: vandnps %ymm0, %ymm2, %ymm0 786; AVX-NEXT: vorps %ymm0, %ymm4, %ymm0 787; AVX-NEXT: vandnps %ymm1, %ymm2, %ymm1 788; AVX-NEXT: vorps %ymm1, %ymm3, %ymm1 789; AVX-NEXT: retq 790; 791; AVX512-LABEL: bitselect_v8i64_mr: 792; AVX512: # %bb.0: 793; AVX512-NEXT: vmovdqa64 (%rdi), %zmm1 794; AVX512-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm0 795; AVX512-NEXT: retq 796 %3 = load <8 x i64>, ptr %0 797 %4 = and <8 x i64> %3, <i64 12884901890, i64 4294967296, i64 12884901890, i64 4294967296, i64 12884901890, i64 4294967296, i64 12884901890, i64 4294967296> 798 %5 = and <8 x i64> %1, <i64 -12884901891, i64 -4294967297, i64 -12884901891, i64 -4294967297, i64 -12884901891, i64 -4294967297, i64 -12884901891, i64 -4294967297> 799 %6 = or <8 x i64> %4, %5 800 ret <8 x i64> %6 801} 802 803define <8 x i64> @bitselect_v8i64_mm(ptr nocapture readonly, ptr nocapture readonly) { 804; SSE-LABEL: bitselect_v8i64_mm: 805; SSE: # %bb.0: 806; SSE-NEXT: movaps {{.*#+}} xmm3 = [18446744073709551612,18446744065119617022] 807; SSE-NEXT: movaps 48(%rsi), %xmm4 808; SSE-NEXT: andps %xmm3, %xmm4 809; SSE-NEXT: movaps 32(%rsi), %xmm5 810; SSE-NEXT: andps %xmm3, %xmm5 811; SSE-NEXT: movaps 16(%rsi), %xmm2 812; SSE-NEXT: andps %xmm3, %xmm2 813; SSE-NEXT: movaps (%rsi), %xmm1 814; SSE-NEXT: andps %xmm3, %xmm1 815; SSE-NEXT: movaps %xmm3, %xmm0 816; SSE-NEXT: andnps (%rdi), %xmm0 817; SSE-NEXT: orps %xmm1, %xmm0 818; SSE-NEXT: movaps %xmm3, %xmm1 819; SSE-NEXT: andnps 16(%rdi), %xmm1 820; SSE-NEXT: orps %xmm2, %xmm1 821; SSE-NEXT: movaps %xmm3, %xmm2 822; SSE-NEXT: andnps 32(%rdi), %xmm2 823; SSE-NEXT: orps %xmm5, %xmm2 824; SSE-NEXT: andnps 48(%rdi), %xmm3 825; SSE-NEXT: orps %xmm4, %xmm3 826; SSE-NEXT: retq 827; 828; XOP-LABEL: bitselect_v8i64_mm: 829; XOP: # %bb.0: 830; XOP-NEXT: vmovdqa (%rsi), %ymm0 831; XOP-NEXT: vmovdqa 32(%rsi), %ymm1 832; XOP-NEXT: vbroadcastf128 {{.*#+}} ymm2 = [18446744073709551612,18446744065119617022,18446744073709551612,18446744065119617022] 833; XOP-NEXT: # ymm2 = mem[0,1,0,1] 834; XOP-NEXT: vpcmov %ymm2, (%rdi), %ymm0, %ymm0 835; XOP-NEXT: vpcmov %ymm2, 32(%rdi), %ymm1, %ymm1 836; XOP-NEXT: retq 837; 838; AVX-LABEL: bitselect_v8i64_mm: 839; AVX: # %bb.0: 840; AVX-NEXT: vbroadcastf128 {{.*#+}} ymm1 = [18446744073709551612,18446744065119617022,18446744073709551612,18446744065119617022] 841; AVX-NEXT: # ymm1 = mem[0,1,0,1] 842; AVX-NEXT: vandps 32(%rsi), %ymm1, %ymm2 843; AVX-NEXT: vandps (%rsi), %ymm1, %ymm0 844; AVX-NEXT: vandnps (%rdi), %ymm1, %ymm3 845; AVX-NEXT: vorps %ymm3, %ymm0, %ymm0 846; AVX-NEXT: vandnps 32(%rdi), %ymm1, %ymm1 847; AVX-NEXT: vorps %ymm1, %ymm2, %ymm1 848; AVX-NEXT: retq 849; 850; AVX512-LABEL: bitselect_v8i64_mm: 851; AVX512: # %bb.0: 852; AVX512-NEXT: vmovdqa64 (%rsi), %zmm1 853; AVX512-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 = [18446744073709551612,18446744065119617022,18446744073709551612,18446744065119617022,18446744073709551612,18446744065119617022,18446744073709551612,18446744065119617022] 854; AVX512-NEXT: # zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] 855; AVX512-NEXT: vpternlogq $202, (%rdi), %zmm1, %zmm0 856; AVX512-NEXT: retq 857 %3 = load <8 x i64>, ptr %0 858 %4 = load <8 x i64>, ptr %1 859 %5 = and <8 x i64> %3, <i64 3, i64 8589934593, i64 3, i64 8589934593, i64 3, i64 8589934593, i64 3, i64 8589934593> 860 %6 = and <8 x i64> %4, <i64 -4, i64 -8589934594, i64 -4, i64 -8589934594, i64 -4, i64 -8589934594, i64 -4, i64 -8589934594> 861 %7 = or <8 x i64> %6, %5 862 ret <8 x i64> %7 863} 864 865define <8 x i64> @bitselect_v8i64_broadcast_rrr(<8 x i64> %a0, <8 x i64> %a1, i64 %a2) { 866; SSE-LABEL: bitselect_v8i64_broadcast_rrr: 867; SSE: # %bb.0: 868; SSE-NEXT: movq %rdi, %xmm8 869; SSE-NEXT: pshufd {{.*#+}} xmm8 = xmm8[0,1,0,1] 870; SSE-NEXT: pand %xmm8, %xmm3 871; SSE-NEXT: pand %xmm8, %xmm2 872; SSE-NEXT: pand %xmm8, %xmm1 873; SSE-NEXT: pand %xmm8, %xmm0 874; SSE-NEXT: movdqa %xmm8, %xmm9 875; SSE-NEXT: pandn %xmm7, %xmm9 876; SSE-NEXT: por %xmm9, %xmm3 877; SSE-NEXT: movdqa %xmm8, %xmm7 878; SSE-NEXT: pandn %xmm6, %xmm7 879; SSE-NEXT: por %xmm7, %xmm2 880; SSE-NEXT: movdqa %xmm8, %xmm6 881; SSE-NEXT: pandn %xmm5, %xmm6 882; SSE-NEXT: por %xmm6, %xmm1 883; SSE-NEXT: pandn %xmm4, %xmm8 884; SSE-NEXT: por %xmm8, %xmm0 885; SSE-NEXT: retq 886; 887; XOP-LABEL: bitselect_v8i64_broadcast_rrr: 888; XOP: # %bb.0: 889; XOP-NEXT: vmovq %rdi, %xmm4 890; XOP-NEXT: vpshufd {{.*#+}} xmm4 = xmm4[0,1,0,1] 891; XOP-NEXT: vinsertf128 $1, %xmm4, %ymm4, %ymm4 892; XOP-NEXT: vpcmov %ymm4, %ymm2, %ymm0, %ymm0 893; XOP-NEXT: vpcmov %ymm4, %ymm3, %ymm1, %ymm1 894; XOP-NEXT: retq 895; 896; AVX1-LABEL: bitselect_v8i64_broadcast_rrr: 897; AVX1: # %bb.0: 898; AVX1-NEXT: vmovq %rdi, %xmm4 899; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm4[0,1,0,1] 900; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm4, %ymm4 901; AVX1-NEXT: vandps %ymm4, %ymm1, %ymm1 902; AVX1-NEXT: vandps %ymm4, %ymm0, %ymm0 903; AVX1-NEXT: vandnps %ymm3, %ymm4, %ymm3 904; AVX1-NEXT: vorps %ymm3, %ymm1, %ymm1 905; AVX1-NEXT: vandnps %ymm2, %ymm4, %ymm2 906; AVX1-NEXT: vorps %ymm2, %ymm0, %ymm0 907; AVX1-NEXT: retq 908; 909; AVX2-LABEL: bitselect_v8i64_broadcast_rrr: 910; AVX2: # %bb.0: 911; AVX2-NEXT: vmovq %rdi, %xmm4 912; AVX2-NEXT: vpbroadcastq %xmm4, %ymm4 913; AVX2-NEXT: vpand %ymm4, %ymm1, %ymm1 914; AVX2-NEXT: vpand %ymm4, %ymm0, %ymm0 915; AVX2-NEXT: vpandn %ymm3, %ymm4, %ymm3 916; AVX2-NEXT: vpor %ymm3, %ymm1, %ymm1 917; AVX2-NEXT: vpandn %ymm2, %ymm4, %ymm2 918; AVX2-NEXT: vpor %ymm2, %ymm0, %ymm0 919; AVX2-NEXT: retq 920; 921; AVX512-LABEL: bitselect_v8i64_broadcast_rrr: 922; AVX512: # %bb.0: 923; AVX512-NEXT: vpbroadcastq %rdi, %zmm2 924; AVX512-NEXT: vpternlogq $226, %zmm1, %zmm2, %zmm0 925; AVX512-NEXT: retq 926 %1 = insertelement <8 x i64> undef, i64 %a2, i32 0 927 %2 = shufflevector <8 x i64> %1, <8 x i64> undef, <8 x i32> zeroinitializer 928 %3 = xor <8 x i64> %1, <i64 -1, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef> 929 %4 = shufflevector <8 x i64> %3, <8 x i64> undef, <8 x i32> zeroinitializer 930 %5 = and <8 x i64> %a0, %2 931 %6 = and <8 x i64> %a1, %4 932 %7 = or <8 x i64> %5, %6 933 ret <8 x i64> %7 934} 935 936define <8 x i64> @bitselect_v8i64_broadcast_rrm(<8 x i64> %a0, <8 x i64> %a1, ptr %p2) { 937; SSE-LABEL: bitselect_v8i64_broadcast_rrm: 938; SSE: # %bb.0: 939; SSE-NEXT: movq {{.*#+}} xmm8 = mem[0],zero 940; SSE-NEXT: pshufd {{.*#+}} xmm8 = xmm8[0,1,0,1] 941; SSE-NEXT: pand %xmm8, %xmm3 942; SSE-NEXT: pand %xmm8, %xmm2 943; SSE-NEXT: pand %xmm8, %xmm1 944; SSE-NEXT: pand %xmm8, %xmm0 945; SSE-NEXT: movdqa %xmm8, %xmm9 946; SSE-NEXT: pandn %xmm7, %xmm9 947; SSE-NEXT: por %xmm9, %xmm3 948; SSE-NEXT: movdqa %xmm8, %xmm7 949; SSE-NEXT: pandn %xmm6, %xmm7 950; SSE-NEXT: por %xmm7, %xmm2 951; SSE-NEXT: movdqa %xmm8, %xmm6 952; SSE-NEXT: pandn %xmm5, %xmm6 953; SSE-NEXT: por %xmm6, %xmm1 954; SSE-NEXT: pandn %xmm4, %xmm8 955; SSE-NEXT: por %xmm8, %xmm0 956; SSE-NEXT: retq 957; 958; XOP-LABEL: bitselect_v8i64_broadcast_rrm: 959; XOP: # %bb.0: 960; XOP-NEXT: vbroadcastsd (%rdi), %ymm4 961; XOP-NEXT: vpcmov %ymm4, %ymm2, %ymm0, %ymm0 962; XOP-NEXT: vpcmov %ymm4, %ymm3, %ymm1, %ymm1 963; XOP-NEXT: retq 964; 965; AVX-LABEL: bitselect_v8i64_broadcast_rrm: 966; AVX: # %bb.0: 967; AVX-NEXT: vbroadcastsd (%rdi), %ymm4 968; AVX-NEXT: vandps %ymm4, %ymm1, %ymm1 969; AVX-NEXT: vandps %ymm4, %ymm0, %ymm0 970; AVX-NEXT: vandnps %ymm3, %ymm4, %ymm3 971; AVX-NEXT: vorps %ymm3, %ymm1, %ymm1 972; AVX-NEXT: vandnps %ymm2, %ymm4, %ymm2 973; AVX-NEXT: vorps %ymm2, %ymm0, %ymm0 974; AVX-NEXT: retq 975; 976; AVX512-LABEL: bitselect_v8i64_broadcast_rrm: 977; AVX512: # %bb.0: 978; AVX512-NEXT: vpternlogq $228, (%rdi){1to8}, %zmm1, %zmm0 979; AVX512-NEXT: retq 980 %a2 = load i64, ptr %p2 981 %1 = insertelement <8 x i64> undef, i64 %a2, i32 0 982 %2 = shufflevector <8 x i64> %1, <8 x i64> undef, <8 x i32> zeroinitializer 983 %3 = xor <8 x i64> %1, <i64 -1, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef> 984 %4 = shufflevector <8 x i64> %3, <8 x i64> undef, <8 x i32> zeroinitializer 985 %5 = and <8 x i64> %a0, %2 986 %6 = and <8 x i64> %a1, %4 987 %7 = or <8 x i64> %5, %6 988 ret <8 x i64> %7 989} 990 991; Check that mask registers don't get canonicalized. 992define <4 x i1> @bitselect_v4i1_loop(<4 x i32> %a0, <4 x i32> %a1) { 993; SSE-LABEL: bitselect_v4i1_loop: 994; SSE: # %bb.0: # %bb 995; SSE-NEXT: pxor %xmm2, %xmm2 996; SSE-NEXT: pcmpeqd %xmm2, %xmm0 997; SSE-NEXT: movdqa {{.*#+}} xmm2 = [12,12,12,12] 998; SSE-NEXT: pcmpeqd %xmm1, %xmm2 999; SSE-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 1000; SSE-NEXT: pand %xmm0, %xmm1 1001; SSE-NEXT: pandn %xmm2, %xmm0 1002; SSE-NEXT: por %xmm1, %xmm0 1003; SSE-NEXT: retq 1004; 1005; XOP-LABEL: bitselect_v4i1_loop: 1006; XOP: # %bb.0: # %bb 1007; XOP-NEXT: vpxor %xmm2, %xmm2, %xmm2 1008; XOP-NEXT: vpcomneqd %xmm2, %xmm0, %xmm0 1009; XOP-NEXT: vpcomeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm2 1010; XOP-NEXT: vpcomeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 1011; XOP-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0 1012; XOP-NEXT: retq 1013; 1014; AVX1-LABEL: bitselect_v4i1_loop: 1015; AVX1: # %bb.0: # %bb 1016; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 1017; AVX1-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0 1018; AVX1-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm2 1019; AVX1-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 1020; AVX1-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0 1021; AVX1-NEXT: retq 1022; 1023; AVX2-LABEL: bitselect_v4i1_loop: 1024; AVX2: # %bb.0: # %bb 1025; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 1026; AVX2-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0 1027; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [12,12,12,12] 1028; AVX2-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm2 1029; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm3 = [15,15,15,15] 1030; AVX2-NEXT: vpcmpeqd %xmm3, %xmm1, %xmm1 1031; AVX2-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0 1032; AVX2-NEXT: retq 1033; 1034; AVX512F-LABEL: bitselect_v4i1_loop: 1035; AVX512F: # %bb.0: # %bb 1036; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 1037; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1038; AVX512F-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %k1 1039; AVX512F-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %k2 1040; AVX512F-NEXT: vptestnmd %zmm0, %zmm0, %k0 {%k2} 1041; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1} 1042; AVX512F-NEXT: korw %k0, %k1, %k1 1043; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 1044; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 1045; AVX512F-NEXT: vzeroupper 1046; AVX512F-NEXT: retq 1047; 1048; AVX512VL-LABEL: bitselect_v4i1_loop: 1049; AVX512VL: # %bb.0: # %bb 1050; AVX512VL-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %k1 1051; AVX512VL-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %k2 1052; AVX512VL-NEXT: vptestnmd %xmm0, %xmm0, %k0 {%k2} 1053; AVX512VL-NEXT: vptestmd %xmm0, %xmm0, %k1 {%k1} 1054; AVX512VL-NEXT: korw %k0, %k1, %k1 1055; AVX512VL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 1056; AVX512VL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 1057; AVX512VL-NEXT: retq 1058bb: 1059 %tmp = icmp ne <4 x i32> %a0, zeroinitializer 1060 %tmp2 = icmp eq <4 x i32> %a1, <i32 12, i32 12, i32 12, i32 12> 1061 %tmp3 = icmp eq <4 x i32> %a1, <i32 15, i32 15, i32 15, i32 15> 1062 %tmp4 = select <4 x i1> %tmp, <4 x i1> %tmp2, <4 x i1> %tmp3 1063 ret <4 x i1> %tmp4 1064} 1065 1066; Regression reported on 057db2002bb3d79429db3c5fe436c8cefc50cb25 1067@d = external global <2 x i64>, align 16 1068define void @constantfold_andn_mask() nounwind { 1069; SSE-LABEL: constantfold_andn_mask: 1070; SSE: # %bb.0: # %entry 1071; SSE-NEXT: pushq %rax 1072; SSE-NEXT: callq use@PLT 1073; SSE-NEXT: movdqu (%rax), %xmm1 1074; SSE-NEXT: movdqa {{.*#+}} xmm2 = [31,248,31,248,31,248,31,248,31,248,31,248,31,248,31,248] 1075; SSE-NEXT: pand %xmm2, %xmm0 1076; SSE-NEXT: pavgb %xmm2, %xmm0 1077; SSE-NEXT: pandn %xmm1, %xmm0 1078; SSE-NEXT: pand %xmm2, %xmm1 1079; SSE-NEXT: pandn %xmm0, %xmm2 1080; SSE-NEXT: por %xmm1, %xmm2 1081; SSE-NEXT: movabsq $87960930222080, %rax # imm = 0x500000000000 1082; SSE-NEXT: xorq d@GOTPCREL(%rip), %rax 1083; SSE-NEXT: movdqa %xmm2, (%rax) 1084; SSE-NEXT: popq %rax 1085; SSE-NEXT: retq 1086; 1087; XOP-LABEL: constantfold_andn_mask: 1088; XOP: # %bb.0: # %entry 1089; XOP-NEXT: pushq %rax 1090; XOP-NEXT: callq use@PLT 1091; XOP-NEXT: vmovdqu (%rax), %xmm1 1092; XOP-NEXT: vbroadcastss {{.*#+}} xmm2 = [31,248,31,248,31,248,31,248,31,248,31,248,31,248,31,248] 1093; XOP-NEXT: vpand %xmm2, %xmm1, %xmm3 1094; XOP-NEXT: vpand %xmm2, %xmm0, %xmm0 1095; XOP-NEXT: vpavgb %xmm2, %xmm0, %xmm0 1096; XOP-NEXT: vpandn %xmm1, %xmm0, %xmm0 1097; XOP-NEXT: vpandn %xmm0, %xmm2, %xmm0 1098; XOP-NEXT: vpor %xmm0, %xmm3, %xmm0 1099; XOP-NEXT: movabsq $87960930222080, %rax # imm = 0x500000000000 1100; XOP-NEXT: xorq d@GOTPCREL(%rip), %rax 1101; XOP-NEXT: vmovdqa %xmm0, (%rax) 1102; XOP-NEXT: popq %rax 1103; XOP-NEXT: retq 1104; 1105; AVX1-LABEL: constantfold_andn_mask: 1106; AVX1: # %bb.0: # %entry 1107; AVX1-NEXT: pushq %rax 1108; AVX1-NEXT: callq use@PLT 1109; AVX1-NEXT: vmovdqu (%rax), %xmm1 1110; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [31,248,31,248,31,248,31,248,31,248,31,248,31,248,31,248] 1111; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3 1112; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 1113; AVX1-NEXT: vpavgb %xmm2, %xmm0, %xmm0 1114; AVX1-NEXT: vpandn %xmm1, %xmm0, %xmm0 1115; AVX1-NEXT: vpandn %xmm0, %xmm2, %xmm0 1116; AVX1-NEXT: vpor %xmm0, %xmm3, %xmm0 1117; AVX1-NEXT: movabsq $87960930222080, %rax # imm = 0x500000000000 1118; AVX1-NEXT: xorq d@GOTPCREL(%rip), %rax 1119; AVX1-NEXT: vmovdqa %xmm0, (%rax) 1120; AVX1-NEXT: popq %rax 1121; AVX1-NEXT: retq 1122; 1123; AVX2-LABEL: constantfold_andn_mask: 1124; AVX2: # %bb.0: # %entry 1125; AVX2-NEXT: pushq %rax 1126; AVX2-NEXT: callq use@PLT 1127; AVX2-NEXT: vmovdqu (%rax), %xmm1 1128; AVX2-NEXT: vpbroadcastw {{.*#+}} xmm2 = [31,248,31,248,31,248,31,248,31,248,31,248,31,248,31,248] 1129; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm3 1130; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0 1131; AVX2-NEXT: vpavgb %xmm2, %xmm0, %xmm0 1132; AVX2-NEXT: vpandn %xmm1, %xmm0, %xmm0 1133; AVX2-NEXT: vpandn %xmm0, %xmm2, %xmm0 1134; AVX2-NEXT: vpor %xmm0, %xmm3, %xmm0 1135; AVX2-NEXT: movabsq $87960930222080, %rax # imm = 0x500000000000 1136; AVX2-NEXT: xorq d@GOTPCREL(%rip), %rax 1137; AVX2-NEXT: vmovdqa %xmm0, (%rax) 1138; AVX2-NEXT: popq %rax 1139; AVX2-NEXT: retq 1140; 1141; AVX512F-LABEL: constantfold_andn_mask: 1142; AVX512F: # %bb.0: # %entry 1143; AVX512F-NEXT: pushq %rax 1144; AVX512F-NEXT: callq use@PLT 1145; AVX512F-NEXT: vmovdqu (%rax), %xmm1 1146; AVX512F-NEXT: vpbroadcastw {{.*#+}} xmm2 = [31,248,31,248,31,248,31,248,31,248,31,248,31,248,31,248] 1147; AVX512F-NEXT: vpand %xmm2, %xmm0, %xmm0 1148; AVX512F-NEXT: vpavgb %xmm2, %xmm0, %xmm0 1149; AVX512F-NEXT: vpandn %xmm1, %xmm0, %xmm0 1150; AVX512F-NEXT: vpternlogq $184, %zmm1, %zmm2, %zmm0 1151; AVX512F-NEXT: movabsq $87960930222080, %rax # imm = 0x500000000000 1152; AVX512F-NEXT: xorq d@GOTPCREL(%rip), %rax 1153; AVX512F-NEXT: vmovdqa %xmm0, (%rax) 1154; AVX512F-NEXT: popq %rax 1155; AVX512F-NEXT: vzeroupper 1156; AVX512F-NEXT: retq 1157; 1158; AVX512VL-LABEL: constantfold_andn_mask: 1159; AVX512VL: # %bb.0: # %entry 1160; AVX512VL-NEXT: pushq %rax 1161; AVX512VL-NEXT: callq use@PLT 1162; AVX512VL-NEXT: vmovdqu (%rax), %xmm1 1163; AVX512VL-NEXT: vpbroadcastw {{.*#+}} xmm2 = [31,248,31,248,31,248,31,248,31,248,31,248,31,248,31,248] 1164; AVX512VL-NEXT: vpand %xmm2, %xmm0, %xmm0 1165; AVX512VL-NEXT: vpavgb %xmm2, %xmm0, %xmm0 1166; AVX512VL-NEXT: vpandn %xmm1, %xmm0, %xmm0 1167; AVX512VL-NEXT: vpternlogq $216, %xmm2, %xmm1, %xmm0 1168; AVX512VL-NEXT: movabsq $87960930222080, %rax # imm = 0x500000000000 1169; AVX512VL-NEXT: xorq d@GOTPCREL(%rip), %rax 1170; AVX512VL-NEXT: vmovdqa %xmm0, (%rax) 1171; AVX512VL-NEXT: popq %rax 1172; AVX512VL-NEXT: retq 1173entry: 1174 %call = call noundef <2 x i64> @use() 1175 %_msret = load <2 x i64>, ptr undef, align 8 1176 %i = bitcast <2 x i64> %_msret to <16 x i8> 1177 %i1 = bitcast <2 x i64> %call to <16 x i8> 1178 %i2 = and <16 x i8> %i, <i8 31, i8 -8, i8 31, i8 -8, i8 31, i8 -8, i8 31, i8 -8, i8 31, i8 -8, i8 31, i8 -8, i8 31, i8 -8, i8 31, i8 -8> 1179 %i3 = and <16 x i8> %i1, <i8 31, i8 -8, i8 31, i8 -8, i8 31, i8 -8, i8 31, i8 -8, i8 31, i8 -8, i8 31, i8 -8, i8 31, i8 -8, i8 31, i8 -8> 1180 %i4 = call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> <i8 31, i8 -8, i8 31, i8 -8, i8 31, i8 -8, i8 31, i8 -8, i8 31, i8 -8, i8 31, i8 -8, i8 31, i8 -8, i8 31, i8 -8>, <16 x i8> %i3) 1181 %i5 = bitcast <16 x i8> %i2 to <2 x i64> 1182 %i6 = bitcast <16 x i8> %i4 to <2 x i64> 1183 %i7 = and <2 x i64> %_msret, <i64 567462211834873824, i64 567462211834873824> 1184 %i8 = xor <2 x i64> zeroinitializer, <i64 -1, i64 -1> 1185 %i9 = xor <2 x i64> %i6, <i64 -1, i64 -1> 1186 %i10 = and <2 x i64> %i8, %i5 1187 %i11 = and <2 x i64> %i7, %i9 1188 %i12 = or <2 x i64> zeroinitializer, %i10 1189 %i13 = or <2 x i64> %i12, %i11 1190 store <2 x i64> %i13, ptr inttoptr (i64 xor (i64 ptrtoint (ptr @d to i64), i64 87960930222080) to ptr), align 16 1191 ret void 1192} 1193 1194declare <2 x i64> @use() 1195declare <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8>, <16 x i8>) 1196