1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s --check-prefixes=CHECK,AVX,AVX12,AVX1 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx2 | FileCheck %s --check-prefixes=CHECK,AVX,AVX12,AVX2 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f | FileCheck %s --check-prefixes=CHECK,AVX,AVX512,AVX512F 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512vl | FileCheck %s --check-prefixes=CHECK,AVX,AVX512,AVX512VL 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=xop | FileCheck %s --check-prefixes=CHECK,XOP 7 8; The condition vector for BLENDV* only cares about the sign bit of each element. 9; So in these tests, if we generate BLENDV*, we should be able to remove the redundant cmp op. 10 11; Test 128-bit vectors for all legal element types. 12 13define <16 x i8> @signbit_sel_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %mask) { 14; CHECK-LABEL: signbit_sel_v16i8: 15; CHECK: # %bb.0: 16; CHECK-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0 17; CHECK-NEXT: retq 18 %tr = icmp slt <16 x i8> %mask, zeroinitializer 19 %z = select <16 x i1> %tr, <16 x i8> %x, <16 x i8> %y 20 ret <16 x i8> %z 21} 22 23; Sorry 16-bit, you're not important enough to support? 24 25define <8 x i16> @signbit_sel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %mask) { 26; AVX12-LABEL: signbit_sel_v8i16: 27; AVX12: # %bb.0: 28; AVX12-NEXT: vpxor %xmm3, %xmm3, %xmm3 29; AVX12-NEXT: vpcmpgtw %xmm2, %xmm3, %xmm2 30; AVX12-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0 31; AVX12-NEXT: retq 32; 33; AVX512F-LABEL: signbit_sel_v8i16: 34; AVX512F: # %bb.0: 35; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 36; AVX512F-NEXT: vpcmpgtw %xmm2, %xmm3, %xmm2 37; AVX512F-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0 38; AVX512F-NEXT: retq 39; 40; AVX512VL-LABEL: signbit_sel_v8i16: 41; AVX512VL: # %bb.0: 42; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3 43; AVX512VL-NEXT: vpcmpgtw %xmm2, %xmm3, %xmm2 44; AVX512VL-NEXT: vpternlogq {{.*#+}} xmm0 = xmm1 ^ (xmm2 & (xmm0 ^ xmm1)) 45; AVX512VL-NEXT: retq 46; 47; XOP-LABEL: signbit_sel_v8i16: 48; XOP: # %bb.0: 49; XOP-NEXT: vpxor %xmm3, %xmm3, %xmm3 50; XOP-NEXT: vpcomltw %xmm3, %xmm2, %xmm2 51; XOP-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0 52; XOP-NEXT: retq 53 %tr = icmp slt <8 x i16> %mask, zeroinitializer 54 %z = select <8 x i1> %tr, <8 x i16> %x, <8 x i16> %y 55 ret <8 x i16> %z 56} 57 58define <4 x i32> @signbit_sel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %mask) { 59; AVX12-LABEL: signbit_sel_v4i32: 60; AVX12: # %bb.0: 61; AVX12-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 62; AVX12-NEXT: retq 63; 64; AVX512F-LABEL: signbit_sel_v4i32: 65; AVX512F: # %bb.0: 66; AVX512F-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2 67; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 68; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 69; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 70; AVX512F-NEXT: vpcmpgtd %zmm2, %zmm3, %k1 71; AVX512F-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} 72; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 73; AVX512F-NEXT: vzeroupper 74; AVX512F-NEXT: retq 75; 76; AVX512VL-LABEL: signbit_sel_v4i32: 77; AVX512VL: # %bb.0: 78; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3 79; AVX512VL-NEXT: vpcmpgtd %xmm2, %xmm3, %k1 80; AVX512VL-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1} 81; AVX512VL-NEXT: retq 82; 83; XOP-LABEL: signbit_sel_v4i32: 84; XOP: # %bb.0: 85; XOP-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 86; XOP-NEXT: retq 87 %tr = icmp slt <4 x i32> %mask, zeroinitializer 88 %z = select <4 x i1> %tr, <4 x i32> %x, <4 x i32> %y 89 ret <4 x i32> %z 90} 91 92define <2 x i64> @signbit_sel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %mask) { 93; AVX12-LABEL: signbit_sel_v2i64: 94; AVX12: # %bb.0: 95; AVX12-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 96; AVX12-NEXT: retq 97; 98; AVX512F-LABEL: signbit_sel_v2i64: 99; AVX512F: # %bb.0: 100; AVX512F-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2 101; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 102; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 103; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 104; AVX512F-NEXT: vpcmpgtq %zmm2, %zmm3, %k1 105; AVX512F-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} 106; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 107; AVX512F-NEXT: vzeroupper 108; AVX512F-NEXT: retq 109; 110; AVX512VL-LABEL: signbit_sel_v2i64: 111; AVX512VL: # %bb.0: 112; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3 113; AVX512VL-NEXT: vpcmpgtq %xmm2, %xmm3, %k1 114; AVX512VL-NEXT: vpblendmq %xmm0, %xmm1, %xmm0 {%k1} 115; AVX512VL-NEXT: retq 116; 117; XOP-LABEL: signbit_sel_v2i64: 118; XOP: # %bb.0: 119; XOP-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 120; XOP-NEXT: retq 121 %tr = icmp slt <2 x i64> %mask, zeroinitializer 122 %z = select <2 x i1> %tr, <2 x i64> %x, <2 x i64> %y 123 ret <2 x i64> %z 124} 125 126define <4 x float> @signbit_sel_v4f32(<4 x float> %x, <4 x float> %y, <4 x i32> %mask) { 127; AVX12-LABEL: signbit_sel_v4f32: 128; AVX12: # %bb.0: 129; AVX12-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 130; AVX12-NEXT: retq 131; 132; AVX512F-LABEL: signbit_sel_v4f32: 133; AVX512F: # %bb.0: 134; AVX512F-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2 135; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 136; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 137; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 138; AVX512F-NEXT: vpcmpgtd %zmm2, %zmm3, %k1 139; AVX512F-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} 140; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 141; AVX512F-NEXT: vzeroupper 142; AVX512F-NEXT: retq 143; 144; AVX512VL-LABEL: signbit_sel_v4f32: 145; AVX512VL: # %bb.0: 146; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3 147; AVX512VL-NEXT: vpcmpgtd %xmm2, %xmm3, %k1 148; AVX512VL-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1} 149; AVX512VL-NEXT: retq 150; 151; XOP-LABEL: signbit_sel_v4f32: 152; XOP: # %bb.0: 153; XOP-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 154; XOP-NEXT: retq 155 %tr = icmp slt <4 x i32> %mask, zeroinitializer 156 %z = select <4 x i1> %tr, <4 x float> %x, <4 x float> %y 157 ret <4 x float> %z 158} 159 160define <2 x double> @signbit_sel_v2f64(<2 x double> %x, <2 x double> %y, <2 x i64> %mask) { 161; AVX12-LABEL: signbit_sel_v2f64: 162; AVX12: # %bb.0: 163; AVX12-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 164; AVX12-NEXT: retq 165; 166; AVX512F-LABEL: signbit_sel_v2f64: 167; AVX512F: # %bb.0: 168; AVX512F-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2 169; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 170; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 171; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 172; AVX512F-NEXT: vpcmpgtq %zmm2, %zmm3, %k1 173; AVX512F-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} 174; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 175; AVX512F-NEXT: vzeroupper 176; AVX512F-NEXT: retq 177; 178; AVX512VL-LABEL: signbit_sel_v2f64: 179; AVX512VL: # %bb.0: 180; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3 181; AVX512VL-NEXT: vpcmpgtq %xmm2, %xmm3, %k1 182; AVX512VL-NEXT: vblendmpd %xmm0, %xmm1, %xmm0 {%k1} 183; AVX512VL-NEXT: retq 184; 185; XOP-LABEL: signbit_sel_v2f64: 186; XOP: # %bb.0: 187; XOP-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 188; XOP-NEXT: retq 189 %tr = icmp slt <2 x i64> %mask, zeroinitializer 190 %z = select <2 x i1> %tr, <2 x double> %x, <2 x double> %y 191 ret <2 x double> %z 192} 193 194; Test 256-bit vectors to see differences between AVX1 and AVX2. 195 196define <32 x i8> @signbit_sel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> %mask) { 197; AVX1-LABEL: signbit_sel_v32i8: 198; AVX1: # %bb.0: 199; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3 200; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4 201; AVX1-NEXT: vpcmpgtb %xmm3, %xmm4, %xmm3 202; AVX1-NEXT: vpcmpgtb %xmm2, %xmm4, %xmm2 203; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2 204; AVX1-NEXT: vandnps %ymm1, %ymm2, %ymm1 205; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0 206; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 207; AVX1-NEXT: retq 208; 209; AVX2-LABEL: signbit_sel_v32i8: 210; AVX2: # %bb.0: 211; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 212; AVX2-NEXT: retq 213; 214; AVX512-LABEL: signbit_sel_v32i8: 215; AVX512: # %bb.0: 216; AVX512-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 217; AVX512-NEXT: retq 218; 219; XOP-LABEL: signbit_sel_v32i8: 220; XOP: # %bb.0: 221; XOP-NEXT: vextractf128 $1, %ymm2, %xmm3 222; XOP-NEXT: vpxor %xmm4, %xmm4, %xmm4 223; XOP-NEXT: vpcomltb %xmm4, %xmm3, %xmm3 224; XOP-NEXT: vpcomltb %xmm4, %xmm2, %xmm2 225; XOP-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2 226; XOP-NEXT: vpcmov %ymm2, %ymm1, %ymm0, %ymm0 227; XOP-NEXT: retq 228 %tr = icmp slt <32 x i8> %mask, zeroinitializer 229 %z = select <32 x i1> %tr, <32 x i8> %x, <32 x i8> %y 230 ret <32 x i8> %z 231} 232 233; Sorry 16-bit, you'll never be important enough to support? 234 235define <16 x i16> @signbit_sel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i16> %mask) { 236; AVX1-LABEL: signbit_sel_v16i16: 237; AVX1: # %bb.0: 238; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3 239; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4 240; AVX1-NEXT: vpcmpgtw %xmm3, %xmm4, %xmm3 241; AVX1-NEXT: vpcmpgtw %xmm2, %xmm4, %xmm2 242; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2 243; AVX1-NEXT: vandnps %ymm1, %ymm2, %ymm1 244; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0 245; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 246; AVX1-NEXT: retq 247; 248; AVX2-LABEL: signbit_sel_v16i16: 249; AVX2: # %bb.0: 250; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3 251; AVX2-NEXT: vpcmpgtw %ymm2, %ymm3, %ymm2 252; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 253; AVX2-NEXT: retq 254; 255; AVX512F-LABEL: signbit_sel_v16i16: 256; AVX512F: # %bb.0: 257; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 258; AVX512F-NEXT: vpcmpgtw %ymm2, %ymm3, %ymm2 259; AVX512F-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 260; AVX512F-NEXT: retq 261; 262; AVX512VL-LABEL: signbit_sel_v16i16: 263; AVX512VL: # %bb.0: 264; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3 265; AVX512VL-NEXT: vpcmpgtw %ymm2, %ymm3, %ymm2 266; AVX512VL-NEXT: vpternlogq {{.*#+}} ymm0 = ymm1 ^ (ymm2 & (ymm0 ^ ymm1)) 267; AVX512VL-NEXT: retq 268; 269; XOP-LABEL: signbit_sel_v16i16: 270; XOP: # %bb.0: 271; XOP-NEXT: vextractf128 $1, %ymm2, %xmm3 272; XOP-NEXT: vpxor %xmm4, %xmm4, %xmm4 273; XOP-NEXT: vpcomltw %xmm4, %xmm3, %xmm3 274; XOP-NEXT: vpcomltw %xmm4, %xmm2, %xmm2 275; XOP-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2 276; XOP-NEXT: vpcmov %ymm2, %ymm1, %ymm0, %ymm0 277; XOP-NEXT: retq 278 %tr = icmp slt <16 x i16> %mask, zeroinitializer 279 %z = select <16 x i1> %tr, <16 x i16> %x, <16 x i16> %y 280 ret <16 x i16> %z 281} 282 283define <8 x i32> @signbit_sel_v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i32> %mask) { 284; AVX12-LABEL: signbit_sel_v8i32: 285; AVX12: # %bb.0: 286; AVX12-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0 287; AVX12-NEXT: retq 288; 289; AVX512F-LABEL: signbit_sel_v8i32: 290; AVX512F: # %bb.0: 291; AVX512F-NEXT: # kill: def $ymm2 killed $ymm2 def $zmm2 292; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 293; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 294; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 295; AVX512F-NEXT: vpcmpgtd %zmm2, %zmm3, %k1 296; AVX512F-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} 297; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 298; AVX512F-NEXT: retq 299; 300; AVX512VL-LABEL: signbit_sel_v8i32: 301; AVX512VL: # %bb.0: 302; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3 303; AVX512VL-NEXT: vpcmpgtd %ymm2, %ymm3, %k1 304; AVX512VL-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} 305; AVX512VL-NEXT: retq 306; 307; XOP-LABEL: signbit_sel_v8i32: 308; XOP: # %bb.0: 309; XOP-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0 310; XOP-NEXT: retq 311 %tr = icmp slt <8 x i32> %mask, zeroinitializer 312 %z = select <8 x i1> %tr, <8 x i32> %x, <8 x i32> %y 313 ret <8 x i32> %z 314} 315 316define <4 x i64> @signbit_sel_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> %mask) { 317; AVX12-LABEL: signbit_sel_v4i64: 318; AVX12: # %bb.0: 319; AVX12-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 320; AVX12-NEXT: retq 321; 322; AVX512F-LABEL: signbit_sel_v4i64: 323; AVX512F: # %bb.0: 324; AVX512F-NEXT: # kill: def $ymm2 killed $ymm2 def $zmm2 325; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 326; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 327; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 328; AVX512F-NEXT: vpcmpgtq %zmm2, %zmm3, %k1 329; AVX512F-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} 330; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 331; AVX512F-NEXT: retq 332; 333; AVX512VL-LABEL: signbit_sel_v4i64: 334; AVX512VL: # %bb.0: 335; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3 336; AVX512VL-NEXT: vpcmpgtq %ymm2, %ymm3, %k1 337; AVX512VL-NEXT: vpblendmq %ymm0, %ymm1, %ymm0 {%k1} 338; AVX512VL-NEXT: retq 339; 340; XOP-LABEL: signbit_sel_v4i64: 341; XOP: # %bb.0: 342; XOP-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 343; XOP-NEXT: retq 344 %tr = icmp slt <4 x i64> %mask, zeroinitializer 345 %z = select <4 x i1> %tr, <4 x i64> %x, <4 x i64> %y 346 ret <4 x i64> %z 347} 348 349define <4 x double> @signbit_sel_v4f64(<4 x double> %x, <4 x double> %y, <4 x i64> %mask) { 350; AVX12-LABEL: signbit_sel_v4f64: 351; AVX12: # %bb.0: 352; AVX12-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 353; AVX12-NEXT: retq 354; 355; AVX512F-LABEL: signbit_sel_v4f64: 356; AVX512F: # %bb.0: 357; AVX512F-NEXT: # kill: def $ymm2 killed $ymm2 def $zmm2 358; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 359; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 360; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 361; AVX512F-NEXT: vpcmpgtq %zmm2, %zmm3, %k1 362; AVX512F-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} 363; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 364; AVX512F-NEXT: retq 365; 366; AVX512VL-LABEL: signbit_sel_v4f64: 367; AVX512VL: # %bb.0: 368; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3 369; AVX512VL-NEXT: vpcmpgtq %ymm2, %ymm3, %k1 370; AVX512VL-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1} 371; AVX512VL-NEXT: retq 372; 373; XOP-LABEL: signbit_sel_v4f64: 374; XOP: # %bb.0: 375; XOP-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 376; XOP-NEXT: retq 377 %tr = icmp slt <4 x i64> %mask, zeroinitializer 378 %z = select <4 x i1> %tr, <4 x double> %x, <4 x double> %y 379 ret <4 x double> %z 380} 381 382; Try a condition with a different type than the select operands. 383 384define <4 x double> @signbit_sel_v4f64_small_mask(<4 x double> %x, <4 x double> %y, <4 x i32> %mask) { 385; AVX1-LABEL: signbit_sel_v4f64_small_mask: 386; AVX1: # %bb.0: 387; AVX1-NEXT: vpmovsxdq %xmm2, %xmm3 388; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,2,3] 389; AVX1-NEXT: vpmovsxdq %xmm2, %xmm2 390; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 391; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 392; AVX1-NEXT: retq 393; 394; AVX2-LABEL: signbit_sel_v4f64_small_mask: 395; AVX2: # %bb.0: 396; AVX2-NEXT: vpmovsxdq %xmm2, %ymm2 397; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 398; AVX2-NEXT: retq 399; 400; AVX512F-LABEL: signbit_sel_v4f64_small_mask: 401; AVX512F: # %bb.0: 402; AVX512F-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2 403; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 404; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 405; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 406; AVX512F-NEXT: vpcmpgtd %zmm2, %zmm3, %k1 407; AVX512F-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} 408; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 409; AVX512F-NEXT: retq 410; 411; AVX512VL-LABEL: signbit_sel_v4f64_small_mask: 412; AVX512VL: # %bb.0: 413; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3 414; AVX512VL-NEXT: vpcmpgtd %xmm2, %xmm3, %k1 415; AVX512VL-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1} 416; AVX512VL-NEXT: retq 417; 418; XOP-LABEL: signbit_sel_v4f64_small_mask: 419; XOP: # %bb.0: 420; XOP-NEXT: vpmovsxdq %xmm2, %xmm3 421; XOP-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,2,3] 422; XOP-NEXT: vpmovsxdq %xmm2, %xmm2 423; XOP-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 424; XOP-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 425; XOP-NEXT: retq 426 %tr = icmp slt <4 x i32> %mask, zeroinitializer 427 %z = select <4 x i1> %tr, <4 x double> %x, <4 x double> %y 428 ret <4 x double> %z 429} 430 431; Try a 512-bit vector to make sure AVX-512 is handled as expected. 432 433define <8 x double> @signbit_sel_v8f64(<8 x double> %x, <8 x double> %y, <8 x i64> %mask) { 434; AVX12-LABEL: signbit_sel_v8f64: 435; AVX12: # %bb.0: 436; AVX12-NEXT: vblendvpd %ymm4, %ymm0, %ymm2, %ymm0 437; AVX12-NEXT: vblendvpd %ymm5, %ymm1, %ymm3, %ymm1 438; AVX12-NEXT: retq 439; 440; AVX512-LABEL: signbit_sel_v8f64: 441; AVX512: # %bb.0: 442; AVX512-NEXT: vpxor %xmm3, %xmm3, %xmm3 443; AVX512-NEXT: vpcmpgtq %zmm2, %zmm3, %k1 444; AVX512-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} 445; AVX512-NEXT: retq 446; 447; XOP-LABEL: signbit_sel_v8f64: 448; XOP: # %bb.0: 449; XOP-NEXT: vblendvpd %ymm4, %ymm0, %ymm2, %ymm0 450; XOP-NEXT: vblendvpd %ymm5, %ymm1, %ymm3, %ymm1 451; XOP-NEXT: retq 452 %tr = icmp slt <8 x i64> %mask, zeroinitializer 453 %z = select <8 x i1> %tr, <8 x double> %x, <8 x double> %y 454 ret <8 x double> %z 455} 456 457; If we have a floating-point compare: 458; (1) Don't die. 459; (2) FIXME: If we don't care about signed-zero (and NaN?), the compare should still get folded. 460 461define <4 x float> @signbit_sel_v4f32_fcmp(<4 x float> %x, <4 x float> %y, <4 x float> %mask) #0 { 462; AVX12-LABEL: signbit_sel_v4f32_fcmp: 463; AVX12: # %bb.0: 464; AVX12-NEXT: vxorps %xmm2, %xmm2, %xmm2 465; AVX12-NEXT: vcmpltps %xmm2, %xmm0, %xmm2 466; AVX12-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 467; AVX12-NEXT: retq 468; 469; AVX512F-LABEL: signbit_sel_v4f32_fcmp: 470; AVX512F: # %bb.0: 471; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 472; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 473; AVX512F-NEXT: vxorps %xmm2, %xmm2, %xmm2 474; AVX512F-NEXT: vcmpltps %zmm2, %zmm0, %k1 475; AVX512F-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} 476; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 477; AVX512F-NEXT: vzeroupper 478; AVX512F-NEXT: retq 479; 480; AVX512VL-LABEL: signbit_sel_v4f32_fcmp: 481; AVX512VL: # %bb.0: 482; AVX512VL-NEXT: vxorps %xmm2, %xmm2, %xmm2 483; AVX512VL-NEXT: vcmpltps %xmm2, %xmm0, %k1 484; AVX512VL-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1} 485; AVX512VL-NEXT: retq 486; 487; XOP-LABEL: signbit_sel_v4f32_fcmp: 488; XOP: # %bb.0: 489; XOP-NEXT: vxorps %xmm2, %xmm2, %xmm2 490; XOP-NEXT: vcmpltps %xmm2, %xmm0, %xmm2 491; XOP-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 492; XOP-NEXT: retq 493 %cmp = fcmp olt <4 x float> %x, zeroinitializer 494 %sel = select <4 x i1> %cmp, <4 x float> %x, <4 x float> %y 495 ret <4 x float> %sel 496} 497 498define <4 x i64> @blend_splat1_mask_cond_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> %z) { 499; AVX1-LABEL: blend_splat1_mask_cond_v4i64: 500; AVX1: # %bb.0: 501; AVX1-NEXT: vpsllq $63, %xmm0, %xmm3 502; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 503; AVX1-NEXT: vpsllq $63, %xmm0, %xmm0 504; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0 505; AVX1-NEXT: vblendvpd %ymm0, %ymm2, %ymm1, %ymm0 506; AVX1-NEXT: retq 507; 508; AVX2-LABEL: blend_splat1_mask_cond_v4i64: 509; AVX2: # %bb.0: 510; AVX2-NEXT: vpsllq $63, %ymm0, %ymm0 511; AVX2-NEXT: vblendvpd %ymm0, %ymm2, %ymm1, %ymm0 512; AVX2-NEXT: retq 513; 514; AVX512F-LABEL: blend_splat1_mask_cond_v4i64: 515; AVX512F: # %bb.0: 516; AVX512F-NEXT: # kill: def $ymm2 killed $ymm2 def $zmm2 517; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 518; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 519; AVX512F-NEXT: vptestnmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1 520; AVX512F-NEXT: vpblendmq %zmm1, %zmm2, %zmm0 {%k1} 521; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 522; AVX512F-NEXT: retq 523; 524; AVX512VL-LABEL: blend_splat1_mask_cond_v4i64: 525; AVX512VL: # %bb.0: 526; AVX512VL-NEXT: vptestnmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1 527; AVX512VL-NEXT: vpblendmq %ymm1, %ymm2, %ymm0 {%k1} 528; AVX512VL-NEXT: retq 529; 530; XOP-LABEL: blend_splat1_mask_cond_v4i64: 531; XOP: # %bb.0: 532; XOP-NEXT: vextractf128 $1, %ymm0, %xmm3 533; XOP-NEXT: vpsllq $63, %xmm3, %xmm3 534; XOP-NEXT: vpmovsxbq {{.*#+}} xmm4 = [18446744073709551553,18446744073709551553] 535; XOP-NEXT: vpshaq %xmm4, %xmm3, %xmm3 536; XOP-NEXT: vpsllq $63, %xmm0, %xmm0 537; XOP-NEXT: vpshaq %xmm4, %xmm0, %xmm0 538; XOP-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 539; XOP-NEXT: vblendvpd %ymm0, %ymm2, %ymm1, %ymm0 540; XOP-NEXT: retq 541 %a = and <4 x i64> %x, <i64 1, i64 1, i64 1, i64 1> 542 %c = icmp eq <4 x i64> %a, zeroinitializer 543 %r = select <4 x i1> %c, <4 x i64> %y, <4 x i64> %z 544 ret <4 x i64> %r 545} 546 547define <4 x i32> @blend_splat1_mask_cond_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { 548; AVX12-LABEL: blend_splat1_mask_cond_v4i32: 549; AVX12: # %bb.0: 550; AVX12-NEXT: vpslld $31, %xmm0, %xmm0 551; AVX12-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0 552; AVX12-NEXT: retq 553; 554; AVX512F-LABEL: blend_splat1_mask_cond_v4i32: 555; AVX512F: # %bb.0: 556; AVX512F-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2 557; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 558; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 559; AVX512F-NEXT: vptestnmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1 560; AVX512F-NEXT: vpblendmd %zmm1, %zmm2, %zmm0 {%k1} 561; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 562; AVX512F-NEXT: vzeroupper 563; AVX512F-NEXT: retq 564; 565; AVX512VL-LABEL: blend_splat1_mask_cond_v4i32: 566; AVX512VL: # %bb.0: 567; AVX512VL-NEXT: vptestnmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1 568; AVX512VL-NEXT: vpblendmd %xmm1, %xmm2, %xmm0 {%k1} 569; AVX512VL-NEXT: retq 570; 571; XOP-LABEL: blend_splat1_mask_cond_v4i32: 572; XOP: # %bb.0: 573; XOP-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 574; XOP-NEXT: vpxor %xmm3, %xmm3, %xmm3 575; XOP-NEXT: vpcomneqd %xmm3, %xmm0, %xmm0 576; XOP-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0 577; XOP-NEXT: retq 578 %a = and <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1> 579 %c = icmp eq <4 x i32> %a, zeroinitializer 580 %r = select <4 x i1> %c, <4 x i32> %y, <4 x i32> %z 581 ret <4 x i32> %r 582} 583 584define <16 x i16> @blend_splat1_mask_cond_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i16> %z) { 585; AVX1-LABEL: blend_splat1_mask_cond_v16i16: 586; AVX1: # %bb.0: 587; AVX1-NEXT: vpsllw $15, %xmm0, %xmm3 588; AVX1-NEXT: vpsraw $15, %xmm3, %xmm3 589; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 590; AVX1-NEXT: vpsllw $15, %xmm0, %xmm0 591; AVX1-NEXT: vpsraw $15, %xmm0, %xmm0 592; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0 593; AVX1-NEXT: vandnps %ymm1, %ymm0, %ymm1 594; AVX1-NEXT: vandps %ymm0, %ymm2, %ymm0 595; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 596; AVX1-NEXT: retq 597; 598; AVX2-LABEL: blend_splat1_mask_cond_v16i16: 599; AVX2: # %bb.0: 600; AVX2-NEXT: vpsllw $15, %ymm0, %ymm0 601; AVX2-NEXT: vpsraw $15, %ymm0, %ymm0 602; AVX2-NEXT: vpblendvb %ymm0, %ymm2, %ymm1, %ymm0 603; AVX2-NEXT: retq 604; 605; AVX512F-LABEL: blend_splat1_mask_cond_v16i16: 606; AVX512F: # %bb.0: 607; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 608; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 609; AVX512F-NEXT: vpcmpeqw %ymm3, %ymm0, %ymm0 610; AVX512F-NEXT: vpblendvb %ymm0, %ymm1, %ymm2, %ymm0 611; AVX512F-NEXT: retq 612; 613; AVX512VL-LABEL: blend_splat1_mask_cond_v16i16: 614; AVX512VL: # %bb.0: 615; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0 616; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3 617; AVX512VL-NEXT: vpcmpeqw %ymm3, %ymm0, %ymm0 618; AVX512VL-NEXT: vpternlogq {{.*#+}} ymm0 = ymm2 ^ (ymm0 & (ymm1 ^ ymm2)) 619; AVX512VL-NEXT: retq 620; 621; XOP-LABEL: blend_splat1_mask_cond_v16i16: 622; XOP: # %bb.0: 623; XOP-NEXT: vpsllw $15, %xmm0, %xmm3 624; XOP-NEXT: vpsraw $15, %xmm3, %xmm3 625; XOP-NEXT: vextractf128 $1, %ymm0, %xmm0 626; XOP-NEXT: vpsllw $15, %xmm0, %xmm0 627; XOP-NEXT: vpsraw $15, %xmm0, %xmm0 628; XOP-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0 629; XOP-NEXT: vpcmov %ymm0, %ymm1, %ymm2, %ymm0 630; XOP-NEXT: retq 631 %a = and <16 x i16> %x, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 632 %c = icmp eq <16 x i16> %a, zeroinitializer 633 %r = select <16 x i1> %c, <16 x i16> %y, <16 x i16> %z 634 ret <16 x i16> %r 635} 636 637define <16 x i8> @blend_splat1_mask_cond_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %z) { 638; AVX12-LABEL: blend_splat1_mask_cond_v16i8: 639; AVX12: # %bb.0: 640; AVX12-NEXT: vpsllw $7, %xmm0, %xmm0 641; AVX12-NEXT: vpblendvb %xmm0, %xmm2, %xmm1, %xmm0 642; AVX12-NEXT: retq 643; 644; AVX512F-LABEL: blend_splat1_mask_cond_v16i8: 645; AVX512F: # %bb.0: 646; AVX512F-NEXT: vpbroadcastb {{.*#+}} xmm3 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] 647; AVX512F-NEXT: vpand %xmm3, %xmm0, %xmm0 648; AVX512F-NEXT: vpcmpeqb %xmm3, %xmm0, %xmm0 649; AVX512F-NEXT: vpblendvb %xmm0, %xmm2, %xmm1, %xmm0 650; AVX512F-NEXT: retq 651; 652; AVX512VL-LABEL: blend_splat1_mask_cond_v16i8: 653; AVX512VL: # %bb.0: 654; AVX512VL-NEXT: vpbroadcastd {{.*#+}} xmm3 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] 655; AVX512VL-NEXT: vpand %xmm3, %xmm0, %xmm0 656; AVX512VL-NEXT: vpcmpeqb %xmm3, %xmm0, %xmm0 657; AVX512VL-NEXT: vpternlogq {{.*#+}} xmm0 = xmm1 ^ (xmm0 & (xmm2 ^ xmm1)) 658; AVX512VL-NEXT: retq 659; 660; XOP-LABEL: blend_splat1_mask_cond_v16i8: 661; XOP: # %bb.0: 662; XOP-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 663; XOP-NEXT: vpxor %xmm3, %xmm3, %xmm3 664; XOP-NEXT: vpcomneqb %xmm3, %xmm0, %xmm0 665; XOP-NEXT: vpblendvb %xmm0, %xmm2, %xmm1, %xmm0 666; XOP-NEXT: retq 667 %a = and <16 x i8> %x, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 668 %c = icmp eq <16 x i8> %a, zeroinitializer 669 %r = select <16 x i1> %c, <16 x i8> %y, <16 x i8> %z 670 ret <16 x i8> %r 671} 672 673define <2 x i64> @blend_splatmax_mask_cond_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %z) { 674; AVX12-LABEL: blend_splatmax_mask_cond_v2i64: 675; AVX12: # %bb.0: 676; AVX12-NEXT: vblendvpd %xmm0, %xmm2, %xmm1, %xmm0 677; AVX12-NEXT: retq 678; 679; AVX512F-LABEL: blend_splatmax_mask_cond_v2i64: 680; AVX512F: # %bb.0: 681; AVX512F-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2 682; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 683; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 684; AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808] 685; AVX512F-NEXT: vptestnmq %zmm3, %zmm0, %k1 686; AVX512F-NEXT: vpblendmq %zmm1, %zmm2, %zmm0 {%k1} 687; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 688; AVX512F-NEXT: vzeroupper 689; AVX512F-NEXT: retq 690; 691; AVX512VL-LABEL: blend_splatmax_mask_cond_v2i64: 692; AVX512VL: # %bb.0: 693; AVX512VL-NEXT: vptestnmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 694; AVX512VL-NEXT: vpblendmq %xmm1, %xmm2, %xmm0 {%k1} 695; AVX512VL-NEXT: retq 696; 697; XOP-LABEL: blend_splatmax_mask_cond_v2i64: 698; XOP: # %bb.0: 699; XOP-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 700; XOP-NEXT: vpxor %xmm3, %xmm3, %xmm3 701; XOP-NEXT: vpcomneqq %xmm3, %xmm0, %xmm0 702; XOP-NEXT: vblendvpd %xmm0, %xmm2, %xmm1, %xmm0 703; XOP-NEXT: retq 704 %a = and <2 x i64> %x, <i64 9223372036854775808, i64 9223372036854775808> 705 %c = icmp eq <2 x i64> %a, zeroinitializer 706 %r = select <2 x i1> %c, <2 x i64> %y, <2 x i64> %z 707 ret <2 x i64> %r 708} 709 710define <8 x i32> @blend_splatmax_mask_cond_v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i32> %z) { 711; AVX12-LABEL: blend_splatmax_mask_cond_v8i32: 712; AVX12: # %bb.0: 713; AVX12-NEXT: vblendvps %ymm0, %ymm2, %ymm1, %ymm0 714; AVX12-NEXT: retq 715; 716; AVX512F-LABEL: blend_splatmax_mask_cond_v8i32: 717; AVX512F: # %bb.0: 718; AVX512F-NEXT: # kill: def $ymm2 killed $ymm2 def $zmm2 719; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 720; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 721; AVX512F-NEXT: vptestnmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1 722; AVX512F-NEXT: vpblendmd %zmm1, %zmm2, %zmm0 {%k1} 723; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 724; AVX512F-NEXT: retq 725; 726; AVX512VL-LABEL: blend_splatmax_mask_cond_v8i32: 727; AVX512VL: # %bb.0: 728; AVX512VL-NEXT: vptestnmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1 729; AVX512VL-NEXT: vpblendmd %ymm1, %ymm2, %ymm0 {%k1} 730; AVX512VL-NEXT: retq 731; 732; XOP-LABEL: blend_splatmax_mask_cond_v8i32: 733; XOP: # %bb.0: 734; XOP-NEXT: vblendvps %ymm0, %ymm2, %ymm1, %ymm0 735; XOP-NEXT: retq 736 %a = and <8 x i32> %x, <i32 2147483648, i32 2147483648, i32 2147483648, i32 2147483648, i32 2147483648, i32 2147483648, i32 2147483648, i32 2147483648> 737 %c = icmp eq <8 x i32> %a, zeroinitializer 738 %r = select <8 x i1> %c, <8 x i32> %y, <8 x i32> %z 739 ret <8 x i32> %r 740} 741 742define <8 x i16> @blend_splatmax_mask_cond_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %z) { 743; AVX12-LABEL: blend_splatmax_mask_cond_v8i16: 744; AVX12: # %bb.0: 745; AVX12-NEXT: vpsraw $15, %xmm0, %xmm0 746; AVX12-NEXT: vpblendvb %xmm0, %xmm2, %xmm1, %xmm0 747; AVX12-NEXT: retq 748; 749; AVX512F-LABEL: blend_splatmax_mask_cond_v8i16: 750; AVX512F: # %bb.0: 751; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 752; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 753; AVX512F-NEXT: vpcmpeqw %xmm3, %xmm0, %xmm0 754; AVX512F-NEXT: vpblendvb %xmm0, %xmm1, %xmm2, %xmm0 755; AVX512F-NEXT: retq 756; 757; AVX512VL-LABEL: blend_splatmax_mask_cond_v8i16: 758; AVX512VL: # %bb.0: 759; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 760; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3 761; AVX512VL-NEXT: vpcmpeqw %xmm3, %xmm0, %xmm0 762; AVX512VL-NEXT: vpternlogq {{.*#+}} xmm0 = xmm2 ^ (xmm0 & (xmm1 ^ xmm2)) 763; AVX512VL-NEXT: retq 764; 765; XOP-LABEL: blend_splatmax_mask_cond_v8i16: 766; XOP: # %bb.0: 767; XOP-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 768; XOP-NEXT: vpxor %xmm3, %xmm3, %xmm3 769; XOP-NEXT: vpcomneqw %xmm3, %xmm0, %xmm0 770; XOP-NEXT: vpblendvb %xmm0, %xmm2, %xmm1, %xmm0 771; XOP-NEXT: retq 772 %a = and <8 x i16> %x, <i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768> 773 %c = icmp eq <8 x i16> %a, zeroinitializer 774 %r = select <8 x i1> %c, <8 x i16> %y, <8 x i16> %z 775 ret <8 x i16> %r 776} 777 778define <32 x i8> @blend_splatmax_mask_cond_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> %z) { 779; AVX1-LABEL: blend_splatmax_mask_cond_v32i8: 780; AVX1: # %bb.0: 781; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 782; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4 783; AVX1-NEXT: vpcmpgtb %xmm3, %xmm4, %xmm3 784; AVX1-NEXT: vpcmpgtb %xmm0, %xmm4, %xmm0 785; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 786; AVX1-NEXT: vandnps %ymm1, %ymm0, %ymm1 787; AVX1-NEXT: vandps %ymm0, %ymm2, %ymm0 788; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 789; AVX1-NEXT: retq 790; 791; AVX2-LABEL: blend_splatmax_mask_cond_v32i8: 792; AVX2: # %bb.0: 793; AVX2-NEXT: vpblendvb %ymm0, %ymm2, %ymm1, %ymm0 794; AVX2-NEXT: retq 795; 796; AVX512F-LABEL: blend_splatmax_mask_cond_v32i8: 797; AVX512F: # %bb.0: 798; AVX512F-NEXT: vpbroadcastb {{.*#+}} ymm3 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] 799; AVX512F-NEXT: vpand %ymm3, %ymm0, %ymm0 800; AVX512F-NEXT: vpcmpeqb %ymm3, %ymm0, %ymm0 801; AVX512F-NEXT: vpblendvb %ymm0, %ymm2, %ymm1, %ymm0 802; AVX512F-NEXT: retq 803; 804; AVX512VL-LABEL: blend_splatmax_mask_cond_v32i8: 805; AVX512VL: # %bb.0: 806; AVX512VL-NEXT: vpbroadcastd {{.*#+}} ymm3 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] 807; AVX512VL-NEXT: vpand %ymm3, %ymm0, %ymm0 808; AVX512VL-NEXT: vpcmpeqb %ymm3, %ymm0, %ymm0 809; AVX512VL-NEXT: vpternlogq {{.*#+}} ymm0 = ymm1 ^ (ymm0 & (ymm2 ^ ymm1)) 810; AVX512VL-NEXT: retq 811; 812; XOP-LABEL: blend_splatmax_mask_cond_v32i8: 813; XOP: # %bb.0: 814; XOP-NEXT: vextractf128 $1, %ymm0, %xmm3 815; XOP-NEXT: vpxor %xmm4, %xmm4, %xmm4 816; XOP-NEXT: vpcmpgtb %xmm3, %xmm4, %xmm3 817; XOP-NEXT: vpcmpgtb %xmm0, %xmm4, %xmm0 818; XOP-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 819; XOP-NEXT: vpcmov %ymm0, %ymm1, %ymm2, %ymm0 820; XOP-NEXT: retq 821 %a = and <32 x i8> %x, <i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128> 822 %c = icmp eq <32 x i8> %a, zeroinitializer 823 %r = select <32 x i1> %c, <32 x i8> %y, <32 x i8> %z 824 ret <32 x i8> %r 825} 826 827define <4 x i64> @blend_splat_mask_cond_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> %z) { 828; AVX1-LABEL: blend_splat_mask_cond_v4i64: 829; AVX1: # %bb.0: 830; AVX1-NEXT: vpsllq $62, %xmm0, %xmm3 831; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 832; AVX1-NEXT: vpsllq $62, %xmm0, %xmm0 833; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0 834; AVX1-NEXT: vblendvpd %ymm0, %ymm2, %ymm1, %ymm0 835; AVX1-NEXT: retq 836; 837; AVX2-LABEL: blend_splat_mask_cond_v4i64: 838; AVX2: # %bb.0: 839; AVX2-NEXT: vpsllq $62, %ymm0, %ymm0 840; AVX2-NEXT: vblendvpd %ymm0, %ymm2, %ymm1, %ymm0 841; AVX2-NEXT: retq 842; 843; AVX512F-LABEL: blend_splat_mask_cond_v4i64: 844; AVX512F: # %bb.0: 845; AVX512F-NEXT: # kill: def $ymm2 killed $ymm2 def $zmm2 846; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 847; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 848; AVX512F-NEXT: vptestnmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1 849; AVX512F-NEXT: vpblendmq %zmm1, %zmm2, %zmm0 {%k1} 850; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 851; AVX512F-NEXT: retq 852; 853; AVX512VL-LABEL: blend_splat_mask_cond_v4i64: 854; AVX512VL: # %bb.0: 855; AVX512VL-NEXT: vptestnmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1 856; AVX512VL-NEXT: vpblendmq %ymm1, %ymm2, %ymm0 {%k1} 857; AVX512VL-NEXT: retq 858; 859; XOP-LABEL: blend_splat_mask_cond_v4i64: 860; XOP: # %bb.0: 861; XOP-NEXT: vextractf128 $1, %ymm0, %xmm3 862; XOP-NEXT: vpsllq $62, %xmm3, %xmm3 863; XOP-NEXT: vpmovsxbq {{.*#+}} xmm4 = [18446744073709551553,18446744073709551553] 864; XOP-NEXT: vpshaq %xmm4, %xmm3, %xmm3 865; XOP-NEXT: vpsllq $62, %xmm0, %xmm0 866; XOP-NEXT: vpshaq %xmm4, %xmm0, %xmm0 867; XOP-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 868; XOP-NEXT: vblendvpd %ymm0, %ymm2, %ymm1, %ymm0 869; XOP-NEXT: retq 870 %a = and <4 x i64> %x, <i64 2, i64 2, i64 2, i64 2> 871 %c = icmp eq <4 x i64> %a, zeroinitializer 872 %r = select <4 x i1> %c, <4 x i64> %y, <4 x i64> %z 873 ret <4 x i64> %r 874} 875 876define <4 x i32> @blend_splat_mask_cond_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { 877; AVX12-LABEL: blend_splat_mask_cond_v4i32: 878; AVX12: # %bb.0: 879; AVX12-NEXT: vpslld $15, %xmm0, %xmm0 880; AVX12-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0 881; AVX12-NEXT: retq 882; 883; AVX512F-LABEL: blend_splat_mask_cond_v4i32: 884; AVX512F: # %bb.0: 885; AVX512F-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2 886; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 887; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 888; AVX512F-NEXT: vptestnmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1 889; AVX512F-NEXT: vpblendmd %zmm1, %zmm2, %zmm0 {%k1} 890; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 891; AVX512F-NEXT: vzeroupper 892; AVX512F-NEXT: retq 893; 894; AVX512VL-LABEL: blend_splat_mask_cond_v4i32: 895; AVX512VL: # %bb.0: 896; AVX512VL-NEXT: vptestnmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1 897; AVX512VL-NEXT: vpblendmd %xmm1, %xmm2, %xmm0 {%k1} 898; AVX512VL-NEXT: retq 899; 900; XOP-LABEL: blend_splat_mask_cond_v4i32: 901; XOP: # %bb.0: 902; XOP-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 903; XOP-NEXT: vpxor %xmm3, %xmm3, %xmm3 904; XOP-NEXT: vpcomneqd %xmm3, %xmm0, %xmm0 905; XOP-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0 906; XOP-NEXT: retq 907 %a = and <4 x i32> %x, <i32 65536, i32 65536, i32 65536, i32 65536> 908 %c = icmp eq <4 x i32> %a, zeroinitializer 909 %r = select <4 x i1> %c, <4 x i32> %y, <4 x i32> %z 910 ret <4 x i32> %r 911} 912 913define <16 x i16> @blend_splat_mask_cond_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i16> %z) { 914; AVX1-LABEL: blend_splat_mask_cond_v16i16: 915; AVX1: # %bb.0: 916; AVX1-NEXT: vpsllw $5, %xmm0, %xmm3 917; AVX1-NEXT: vpsraw $15, %xmm3, %xmm3 918; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 919; AVX1-NEXT: vpsllw $5, %xmm0, %xmm0 920; AVX1-NEXT: vpsraw $15, %xmm0, %xmm0 921; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0 922; AVX1-NEXT: vandnps %ymm1, %ymm0, %ymm1 923; AVX1-NEXT: vandps %ymm0, %ymm2, %ymm0 924; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 925; AVX1-NEXT: retq 926; 927; AVX2-LABEL: blend_splat_mask_cond_v16i16: 928; AVX2: # %bb.0: 929; AVX2-NEXT: vpsllw $5, %ymm0, %ymm0 930; AVX2-NEXT: vpsraw $15, %ymm0, %ymm0 931; AVX2-NEXT: vpblendvb %ymm0, %ymm2, %ymm1, %ymm0 932; AVX2-NEXT: retq 933; 934; AVX512F-LABEL: blend_splat_mask_cond_v16i16: 935; AVX512F: # %bb.0: 936; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 937; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 938; AVX512F-NEXT: vpcmpeqw %ymm3, %ymm0, %ymm0 939; AVX512F-NEXT: vpblendvb %ymm0, %ymm1, %ymm2, %ymm0 940; AVX512F-NEXT: retq 941; 942; AVX512VL-LABEL: blend_splat_mask_cond_v16i16: 943; AVX512VL: # %bb.0: 944; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0 945; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3 946; AVX512VL-NEXT: vpcmpeqw %ymm3, %ymm0, %ymm0 947; AVX512VL-NEXT: vpternlogq {{.*#+}} ymm0 = ymm2 ^ (ymm0 & (ymm1 ^ ymm2)) 948; AVX512VL-NEXT: retq 949; 950; XOP-LABEL: blend_splat_mask_cond_v16i16: 951; XOP: # %bb.0: 952; XOP-NEXT: vpsllw $5, %xmm0, %xmm3 953; XOP-NEXT: vpsraw $15, %xmm3, %xmm3 954; XOP-NEXT: vextractf128 $1, %ymm0, %xmm0 955; XOP-NEXT: vpsllw $5, %xmm0, %xmm0 956; XOP-NEXT: vpsraw $15, %xmm0, %xmm0 957; XOP-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0 958; XOP-NEXT: vpcmov %ymm0, %ymm1, %ymm2, %ymm0 959; XOP-NEXT: retq 960 %a = and <16 x i16> %x, <i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024> 961 %c = icmp eq <16 x i16> %a, zeroinitializer 962 %r = select <16 x i1> %c, <16 x i16> %y, <16 x i16> %z 963 ret <16 x i16> %r 964} 965 966define <16 x i8> @blend_splat_mask_cond_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %z) { 967; AVX12-LABEL: blend_splat_mask_cond_v16i8: 968; AVX12: # %bb.0: 969; AVX12-NEXT: vpsllw $5, %xmm0, %xmm0 970; AVX12-NEXT: vpblendvb %xmm0, %xmm2, %xmm1, %xmm0 971; AVX12-NEXT: retq 972; 973; AVX512F-LABEL: blend_splat_mask_cond_v16i8: 974; AVX512F: # %bb.0: 975; AVX512F-NEXT: vpbroadcastb {{.*#+}} xmm3 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4] 976; AVX512F-NEXT: vpand %xmm3, %xmm0, %xmm0 977; AVX512F-NEXT: vpcmpeqb %xmm3, %xmm0, %xmm0 978; AVX512F-NEXT: vpblendvb %xmm0, %xmm2, %xmm1, %xmm0 979; AVX512F-NEXT: retq 980; 981; AVX512VL-LABEL: blend_splat_mask_cond_v16i8: 982; AVX512VL: # %bb.0: 983; AVX512VL-NEXT: vpbroadcastd {{.*#+}} xmm3 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4] 984; AVX512VL-NEXT: vpand %xmm3, %xmm0, %xmm0 985; AVX512VL-NEXT: vpcmpeqb %xmm3, %xmm0, %xmm0 986; AVX512VL-NEXT: vpternlogq {{.*#+}} xmm0 = xmm1 ^ (xmm0 & (xmm2 ^ xmm1)) 987; AVX512VL-NEXT: retq 988; 989; XOP-LABEL: blend_splat_mask_cond_v16i8: 990; XOP: # %bb.0: 991; XOP-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 992; XOP-NEXT: vpxor %xmm3, %xmm3, %xmm3 993; XOP-NEXT: vpcomneqb %xmm3, %xmm0, %xmm0 994; XOP-NEXT: vpblendvb %xmm0, %xmm2, %xmm1, %xmm0 995; XOP-NEXT: retq 996 %a = and <16 x i8> %x, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4> 997 %c = icmp eq <16 x i8> %a, zeroinitializer 998 %r = select <16 x i1> %c, <16 x i8> %y, <16 x i8> %z 999 ret <16 x i8> %r 1000} 1001 1002define <2 x i64> @blend_mask_cond_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %z) { 1003; AVX1-LABEL: blend_mask_cond_v2i64: 1004; AVX1: # %bb.0: 1005; AVX1-NEXT: vpmovsxbq {{.*#+}} xmm3 = [1,4] 1006; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm0 1007; AVX1-NEXT: vpcmpeqq %xmm3, %xmm0, %xmm0 1008; AVX1-NEXT: vblendvpd %xmm0, %xmm2, %xmm1, %xmm0 1009; AVX1-NEXT: retq 1010; 1011; AVX2-LABEL: blend_mask_cond_v2i64: 1012; AVX2: # %bb.0: 1013; AVX2-NEXT: vpsllvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1014; AVX2-NEXT: vblendvpd %xmm0, %xmm2, %xmm1, %xmm0 1015; AVX2-NEXT: retq 1016; 1017; AVX512F-LABEL: blend_mask_cond_v2i64: 1018; AVX512F: # %bb.0: 1019; AVX512F-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2 1020; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 1021; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1022; AVX512F-NEXT: vpmovsxbq {{.*#+}} xmm3 = [1,4] 1023; AVX512F-NEXT: vptestnmq %zmm3, %zmm0, %k1 1024; AVX512F-NEXT: vpblendmq %zmm1, %zmm2, %zmm0 {%k1} 1025; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 1026; AVX512F-NEXT: vzeroupper 1027; AVX512F-NEXT: retq 1028; 1029; AVX512VL-LABEL: blend_mask_cond_v2i64: 1030; AVX512VL: # %bb.0: 1031; AVX512VL-NEXT: vptestnmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 1032; AVX512VL-NEXT: vpblendmq %xmm1, %xmm2, %xmm0 {%k1} 1033; AVX512VL-NEXT: retq 1034; 1035; XOP-LABEL: blend_mask_cond_v2i64: 1036; XOP: # %bb.0: 1037; XOP-NEXT: vpshlq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1038; XOP-NEXT: vblendvpd %xmm0, %xmm2, %xmm1, %xmm0 1039; XOP-NEXT: retq 1040 %a = and <2 x i64> %x, <i64 1, i64 4> 1041 %c = icmp eq <2 x i64> %a, zeroinitializer 1042 %r = select <2 x i1> %c, <2 x i64> %y, <2 x i64> %z 1043 ret <2 x i64> %r 1044} 1045 1046define <4 x i32> @blend_mask_cond_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { 1047; AVX1-LABEL: blend_mask_cond_v4i32: 1048; AVX1: # %bb.0: 1049; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1050; AVX1-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0 1051; AVX1-NEXT: retq 1052; 1053; AVX2-LABEL: blend_mask_cond_v4i32: 1054; AVX2: # %bb.0: 1055; AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1056; AVX2-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0 1057; AVX2-NEXT: retq 1058; 1059; AVX512F-LABEL: blend_mask_cond_v4i32: 1060; AVX512F: # %bb.0: 1061; AVX512F-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2 1062; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 1063; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1064; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [65536,512,2,1] 1065; AVX512F-NEXT: vptestnmd %zmm3, %zmm0, %k1 1066; AVX512F-NEXT: vpblendmd %zmm1, %zmm2, %zmm0 {%k1} 1067; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 1068; AVX512F-NEXT: vzeroupper 1069; AVX512F-NEXT: retq 1070; 1071; AVX512VL-LABEL: blend_mask_cond_v4i32: 1072; AVX512VL: # %bb.0: 1073; AVX512VL-NEXT: vptestnmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 1074; AVX512VL-NEXT: vpblendmd %xmm1, %xmm2, %xmm0 {%k1} 1075; AVX512VL-NEXT: retq 1076; 1077; XOP-LABEL: blend_mask_cond_v4i32: 1078; XOP: # %bb.0: 1079; XOP-NEXT: vpshld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1080; XOP-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0 1081; XOP-NEXT: retq 1082 %a = and <4 x i32> %x, <i32 65536, i32 512, i32 2, i32 1> 1083 %c = icmp eq <4 x i32> %a, zeroinitializer 1084 %r = select <4 x i1> %c, <4 x i32> %y, <4 x i32> %z 1085 ret <4 x i32> %r 1086} 1087 1088define <8 x i16> @blend_mask_cond_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %z) { 1089; AVX12-LABEL: blend_mask_cond_v8i16: 1090; AVX12: # %bb.0: 1091; AVX12-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1092; AVX12-NEXT: vpxor %xmm3, %xmm3, %xmm3 1093; AVX12-NEXT: vpcmpeqw %xmm3, %xmm0, %xmm0 1094; AVX12-NEXT: vpblendvb %xmm0, %xmm1, %xmm2, %xmm0 1095; AVX12-NEXT: retq 1096; 1097; AVX512F-LABEL: blend_mask_cond_v8i16: 1098; AVX512F: # %bb.0: 1099; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1100; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 1101; AVX512F-NEXT: vpcmpeqw %xmm3, %xmm0, %xmm0 1102; AVX512F-NEXT: vpblendvb %xmm0, %xmm1, %xmm2, %xmm0 1103; AVX512F-NEXT: retq 1104; 1105; AVX512VL-LABEL: blend_mask_cond_v8i16: 1106; AVX512VL: # %bb.0: 1107; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1108; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3 1109; AVX512VL-NEXT: vpcmpeqw %xmm3, %xmm0, %xmm0 1110; AVX512VL-NEXT: vpternlogq {{.*#+}} xmm0 = xmm2 ^ (xmm0 & (xmm1 ^ xmm2)) 1111; AVX512VL-NEXT: retq 1112; 1113; XOP-LABEL: blend_mask_cond_v8i16: 1114; XOP: # %bb.0: 1115; XOP-NEXT: vpxor %xmm3, %xmm3, %xmm3 1116; XOP-NEXT: vpshlw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1117; XOP-NEXT: vpcomltw %xmm3, %xmm0, %xmm0 1118; XOP-NEXT: vpblendvb %xmm0, %xmm2, %xmm1, %xmm0 1119; XOP-NEXT: retq 1120 %a = and <8 x i16> %x, <i16 1, i16 2, i16 8, i16 4, i16 8, i16 1024, i16 2, i16 4096> 1121 %c = icmp eq <8 x i16> %a, zeroinitializer 1122 %r = select <8 x i1> %c, <8 x i16> %y, <8 x i16> %z 1123 ret <8 x i16> %r 1124} 1125 1126define <16 x i8> @blend_mask_cond_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %z) { 1127; AVX12-LABEL: blend_mask_cond_v16i8: 1128; AVX12: # %bb.0: 1129; AVX12-NEXT: vmovdqa {{.*#+}} xmm3 = [1,2,4,8,16,32,64,128,4,4,4,4,2,2,2,2] 1130; AVX12-NEXT: vpand %xmm3, %xmm0, %xmm0 1131; AVX12-NEXT: vpcmpeqb %xmm3, %xmm0, %xmm0 1132; AVX12-NEXT: vpblendvb %xmm0, %xmm2, %xmm1, %xmm0 1133; AVX12-NEXT: retq 1134; 1135; AVX512F-LABEL: blend_mask_cond_v16i8: 1136; AVX512F: # %bb.0: 1137; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [1,2,4,8,16,32,64,128,4,4,4,4,2,2,2,2] 1138; AVX512F-NEXT: vpand %xmm3, %xmm0, %xmm0 1139; AVX512F-NEXT: vpcmpeqb %xmm3, %xmm0, %xmm0 1140; AVX512F-NEXT: vpblendvb %xmm0, %xmm2, %xmm1, %xmm0 1141; AVX512F-NEXT: retq 1142; 1143; AVX512VL-LABEL: blend_mask_cond_v16i8: 1144; AVX512VL: # %bb.0: 1145; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [1,2,4,8,16,32,64,128,4,4,4,4,2,2,2,2] 1146; AVX512VL-NEXT: vpand %xmm3, %xmm0, %xmm0 1147; AVX512VL-NEXT: vpcmpeqb %xmm3, %xmm0, %xmm0 1148; AVX512VL-NEXT: vpternlogq {{.*#+}} xmm0 = xmm1 ^ (xmm0 & (xmm2 ^ xmm1)) 1149; AVX512VL-NEXT: retq 1150; 1151; XOP-LABEL: blend_mask_cond_v16i8: 1152; XOP: # %bb.0: 1153; XOP-NEXT: vpshlb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1154; XOP-NEXT: vpblendvb %xmm0, %xmm2, %xmm1, %xmm0 1155; XOP-NEXT: retq 1156 %a = and <16 x i8> %x, <i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 128, i8 4, i8 4, i8 4, i8 4, i8 2, i8 2, i8 2, i8 2> 1157 %c = icmp eq <16 x i8> %a, zeroinitializer 1158 %r = select <16 x i1> %c, <16 x i8> %y, <16 x i8> %z 1159 ret <16 x i8> %r 1160} 1161 1162define <4 x i64> @blend_mask_cond_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> %z) { 1163; AVX1-LABEL: blend_mask_cond_v4i64: 1164; AVX1: # %bb.0: 1165; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 1166; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1167; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4 1168; AVX1-NEXT: vpcmpeqq %xmm4, %xmm3, %xmm3 1169; AVX1-NEXT: vpcmpeqq %xmm4, %xmm0, %xmm0 1170; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 1171; AVX1-NEXT: vblendvpd %ymm0, %ymm1, %ymm2, %ymm0 1172; AVX1-NEXT: retq 1173; 1174; AVX2-LABEL: blend_mask_cond_v4i64: 1175; AVX2: # %bb.0: 1176; AVX2-NEXT: vpsllvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 1177; AVX2-NEXT: vblendvpd %ymm0, %ymm2, %ymm1, %ymm0 1178; AVX2-NEXT: retq 1179; 1180; AVX512F-LABEL: blend_mask_cond_v4i64: 1181; AVX512F: # %bb.0: 1182; AVX512F-NEXT: # kill: def $ymm2 killed $ymm2 def $zmm2 1183; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 1184; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 1185; AVX512F-NEXT: vpmovzxwq {{.*#+}} ymm3 = [2,4,32768,1] 1186; AVX512F-NEXT: vptestnmq %zmm3, %zmm0, %k1 1187; AVX512F-NEXT: vpblendmq %zmm1, %zmm2, %zmm0 {%k1} 1188; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 1189; AVX512F-NEXT: retq 1190; 1191; AVX512VL-LABEL: blend_mask_cond_v4i64: 1192; AVX512VL: # %bb.0: 1193; AVX512VL-NEXT: vptestnmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %k1 1194; AVX512VL-NEXT: vpblendmq %ymm1, %ymm2, %ymm0 {%k1} 1195; AVX512VL-NEXT: retq 1196; 1197; XOP-LABEL: blend_mask_cond_v4i64: 1198; XOP: # %bb.0: 1199; XOP-NEXT: vpshlq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm3 1200; XOP-NEXT: vextractf128 $1, %ymm0, %xmm0 1201; XOP-NEXT: vpshlq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1202; XOP-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0 1203; XOP-NEXT: vblendvpd %ymm0, %ymm2, %ymm1, %ymm0 1204; XOP-NEXT: retq 1205 %a = and <4 x i64> %x, <i64 2, i64 4, i64 32768, i64 1> 1206 %c = icmp eq <4 x i64> %a, zeroinitializer 1207 %r = select <4 x i1> %c, <4 x i64> %y, <4 x i64> %z 1208 ret <4 x i64> %r 1209} 1210 1211define <8 x i32> @blend_mask_cond_v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i32> %z) { 1212; AVX1-LABEL: blend_mask_cond_v8i32: 1213; AVX1: # %bb.0: 1214; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm3 1215; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1216; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1217; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0 1218; AVX1-NEXT: vblendvps %ymm0, %ymm2, %ymm1, %ymm0 1219; AVX1-NEXT: retq 1220; 1221; AVX2-LABEL: blend_mask_cond_v8i32: 1222; AVX2: # %bb.0: 1223; AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 1224; AVX2-NEXT: vblendvps %ymm0, %ymm2, %ymm1, %ymm0 1225; AVX2-NEXT: retq 1226; 1227; AVX512F-LABEL: blend_mask_cond_v8i32: 1228; AVX512F: # %bb.0: 1229; AVX512F-NEXT: # kill: def $ymm2 killed $ymm2 def $zmm2 1230; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 1231; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 1232; AVX512F-NEXT: vpmovsxwd {{.*#+}} ymm3 = [1,2,8,4,8,1024,2,4096] 1233; AVX512F-NEXT: vptestnmd %zmm3, %zmm0, %k1 1234; AVX512F-NEXT: vpblendmd %zmm1, %zmm2, %zmm0 {%k1} 1235; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 1236; AVX512F-NEXT: retq 1237; 1238; AVX512VL-LABEL: blend_mask_cond_v8i32: 1239; AVX512VL: # %bb.0: 1240; AVX512VL-NEXT: vptestnmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %k1 1241; AVX512VL-NEXT: vpblendmd %ymm1, %ymm2, %ymm0 {%k1} 1242; AVX512VL-NEXT: retq 1243; 1244; XOP-LABEL: blend_mask_cond_v8i32: 1245; XOP: # %bb.0: 1246; XOP-NEXT: vpshld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm3 1247; XOP-NEXT: vextractf128 $1, %ymm0, %xmm0 1248; XOP-NEXT: vpshld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1249; XOP-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0 1250; XOP-NEXT: vblendvps %ymm0, %ymm2, %ymm1, %ymm0 1251; XOP-NEXT: retq 1252 %a = and <8 x i32> %x, <i32 1, i32 2, i32 8, i32 4, i32 8, i32 1024, i32 2, i32 4096> 1253 %c = icmp eq <8 x i32> %a, zeroinitializer 1254 %r = select <8 x i1> %c, <8 x i32> %y, <8 x i32> %z 1255 ret <8 x i32> %r 1256} 1257 1258define <16 x i16> @blend_mask_cond_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i16> %z) { 1259; AVX1-LABEL: blend_mask_cond_v16i16: 1260; AVX1: # %bb.0: 1261; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 1262; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1263; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4 1264; AVX1-NEXT: vpcmpeqw %xmm4, %xmm3, %xmm3 1265; AVX1-NEXT: vpcmpeqw %xmm4, %xmm0, %xmm0 1266; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 1267; AVX1-NEXT: vandnps %ymm2, %ymm0, %ymm2 1268; AVX1-NEXT: vandps %ymm0, %ymm1, %ymm0 1269; AVX1-NEXT: vorps %ymm2, %ymm0, %ymm0 1270; AVX1-NEXT: retq 1271; 1272; AVX2-LABEL: blend_mask_cond_v16i16: 1273; AVX2: # %bb.0: 1274; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 1275; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3 1276; AVX2-NEXT: vpcmpeqw %ymm3, %ymm0, %ymm0 1277; AVX2-NEXT: vpblendvb %ymm0, %ymm1, %ymm2, %ymm0 1278; AVX2-NEXT: retq 1279; 1280; AVX512F-LABEL: blend_mask_cond_v16i16: 1281; AVX512F: # %bb.0: 1282; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 1283; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 1284; AVX512F-NEXT: vpcmpeqw %ymm3, %ymm0, %ymm0 1285; AVX512F-NEXT: vpblendvb %ymm0, %ymm1, %ymm2, %ymm0 1286; AVX512F-NEXT: retq 1287; 1288; AVX512VL-LABEL: blend_mask_cond_v16i16: 1289; AVX512VL: # %bb.0: 1290; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 1291; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3 1292; AVX512VL-NEXT: vpcmpeqw %ymm3, %ymm0, %ymm0 1293; AVX512VL-NEXT: vpternlogq {{.*#+}} ymm0 = ymm2 ^ (ymm0 & (ymm1 ^ ymm2)) 1294; AVX512VL-NEXT: retq 1295; 1296; XOP-LABEL: blend_mask_cond_v16i16: 1297; XOP: # %bb.0: 1298; XOP-NEXT: vextractf128 $1, %ymm0, %xmm3 1299; XOP-NEXT: vpshlw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 1300; XOP-NEXT: vpxor %xmm4, %xmm4, %xmm4 1301; XOP-NEXT: vpcomltw %xmm4, %xmm3, %xmm3 1302; XOP-NEXT: vpshlw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1303; XOP-NEXT: vpcomltw %xmm4, %xmm0, %xmm0 1304; XOP-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 1305; XOP-NEXT: vpcmov %ymm0, %ymm1, %ymm2, %ymm0 1306; XOP-NEXT: retq 1307 %a = and <16 x i16> %x, <i16 1, i16 2, i16 8, i16 4, i16 8, i16 2, i16 2, i16 2, i16 2, i16 8, i16 8, i16 64, i16 64, i16 1024, i16 4096, i16 1024> 1308 %c = icmp eq <16 x i16> %a, zeroinitializer 1309 %r = select <16 x i1> %c, <16 x i16> %y, <16 x i16> %z 1310 ret <16 x i16> %r 1311} 1312 1313define <32 x i8> @blend_mask_cond_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> %z) { 1314; AVX1-LABEL: blend_mask_cond_v32i8: 1315; AVX1: # %bb.0: 1316; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 1317; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1318; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4 1319; AVX1-NEXT: vpcmpeqb %xmm4, %xmm3, %xmm3 1320; AVX1-NEXT: vpcmpeqb %xmm4, %xmm0, %xmm0 1321; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 1322; AVX1-NEXT: vandnps %ymm2, %ymm0, %ymm2 1323; AVX1-NEXT: vandps %ymm0, %ymm1, %ymm0 1324; AVX1-NEXT: vorps %ymm2, %ymm0, %ymm0 1325; AVX1-NEXT: retq 1326; 1327; AVX2-LABEL: blend_mask_cond_v32i8: 1328; AVX2: # %bb.0: 1329; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [1,2,4,8,16,32,64,128,4,4,4,4,2,2,2,2,1,2,4,8,16,32,64,128,4,4,4,4,128,4,2,16] 1330; AVX2-NEXT: vpand %ymm3, %ymm0, %ymm0 1331; AVX2-NEXT: vpcmpeqb %ymm3, %ymm0, %ymm0 1332; AVX2-NEXT: vpblendvb %ymm0, %ymm2, %ymm1, %ymm0 1333; AVX2-NEXT: retq 1334; 1335; AVX512F-LABEL: blend_mask_cond_v32i8: 1336; AVX512F: # %bb.0: 1337; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [1,2,4,8,16,32,64,128,4,4,4,4,2,2,2,2,1,2,4,8,16,32,64,128,4,4,4,4,128,4,2,16] 1338; AVX512F-NEXT: vpand %ymm3, %ymm0, %ymm0 1339; AVX512F-NEXT: vpcmpeqb %ymm3, %ymm0, %ymm0 1340; AVX512F-NEXT: vpblendvb %ymm0, %ymm2, %ymm1, %ymm0 1341; AVX512F-NEXT: retq 1342; 1343; AVX512VL-LABEL: blend_mask_cond_v32i8: 1344; AVX512VL: # %bb.0: 1345; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm3 = [1,2,4,8,16,32,64,128,4,4,4,4,2,2,2,2,1,2,4,8,16,32,64,128,4,4,4,4,128,4,2,16] 1346; AVX512VL-NEXT: vpand %ymm3, %ymm0, %ymm0 1347; AVX512VL-NEXT: vpcmpeqb %ymm3, %ymm0, %ymm0 1348; AVX512VL-NEXT: vpternlogq {{.*#+}} ymm0 = ymm1 ^ (ymm0 & (ymm2 ^ ymm1)) 1349; AVX512VL-NEXT: retq 1350; 1351; XOP-LABEL: blend_mask_cond_v32i8: 1352; XOP: # %bb.0: 1353; XOP-NEXT: vextractf128 $1, %ymm0, %xmm3 1354; XOP-NEXT: vpshlb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 1355; XOP-NEXT: vpxor %xmm4, %xmm4, %xmm4 1356; XOP-NEXT: vpcomltb %xmm4, %xmm3, %xmm3 1357; XOP-NEXT: vpshlb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1358; XOP-NEXT: vpcomltb %xmm4, %xmm0, %xmm0 1359; XOP-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 1360; XOP-NEXT: vpcmov %ymm0, %ymm1, %ymm2, %ymm0 1361; XOP-NEXT: retq 1362 %a = and <32 x i8> %x, <i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 128, i8 4, i8 4, i8 4, i8 4, i8 2, i8 2, i8 2, i8 2, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 128, i8 4, i8 4, i8 4, i8 4, i8 128, i8 4, i8 2, i8 16> 1363 %c = icmp eq <32 x i8> %a, zeroinitializer 1364 %r = select <32 x i1> %c, <32 x i8> %y, <32 x i8> %z 1365 ret <32 x i8> %r 1366} 1367 1368define void @store_blend_load_v4i64(ptr %a0, ptr %a1, ptr %a2) { 1369; AVX1-LABEL: store_blend_load_v4i64: 1370; AVX1: # %bb.0: 1371; AVX1-NEXT: vmovapd (%rsi), %ymm0 1372; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808] 1373; AVX1-NEXT: # xmm1 = mem[0,0] 1374; AVX1-NEXT: vpxor 16(%rdi), %xmm1, %xmm2 1375; AVX1-NEXT: vmovddup {{.*#+}} xmm3 = [9223372036854775815,9223372036854775815] 1376; AVX1-NEXT: # xmm3 = mem[0,0] 1377; AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2 1378; AVX1-NEXT: vpxor (%rdi), %xmm1, %xmm1 1379; AVX1-NEXT: vpcmpgtq %xmm3, %xmm1, %xmm1 1380; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 1381; AVX1-NEXT: vblendvpd %ymm1, (%rdi), %ymm0, %ymm0 1382; AVX1-NEXT: vmovapd %ymm0, (%rdx) 1383; AVX1-NEXT: vzeroupper 1384; AVX1-NEXT: retq 1385; 1386; AVX2-LABEL: store_blend_load_v4i64: 1387; AVX2: # %bb.0: 1388; AVX2-NEXT: vmovdqa (%rdi), %ymm0 1389; AVX2-NEXT: vmovapd (%rsi), %ymm1 1390; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] 1391; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm2 1392; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm3 = [9223372036854775815,9223372036854775815,9223372036854775815,9223372036854775815] 1393; AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2 1394; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 1395; AVX2-NEXT: vmovapd %ymm0, (%rdx) 1396; AVX2-NEXT: vzeroupper 1397; AVX2-NEXT: retq 1398; 1399; AVX512F-LABEL: store_blend_load_v4i64: 1400; AVX512F: # %bb.0: 1401; AVX512F-NEXT: vmovdqa (%rdi), %ymm0 1402; AVX512F-NEXT: vmovdqa (%rsi), %ymm1 1403; AVX512F-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1 1404; AVX512F-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1} 1405; AVX512F-NEXT: vmovdqa %ymm1, (%rdx) 1406; AVX512F-NEXT: vzeroupper 1407; AVX512F-NEXT: retq 1408; 1409; AVX512VL-LABEL: store_blend_load_v4i64: 1410; AVX512VL: # %bb.0: 1411; AVX512VL-NEXT: vmovdqa (%rdi), %ymm0 1412; AVX512VL-NEXT: vmovdqa (%rsi), %ymm1 1413; AVX512VL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1 1414; AVX512VL-NEXT: vmovdqa64 %ymm0, %ymm1 {%k1} 1415; AVX512VL-NEXT: vmovdqa %ymm1, (%rdx) 1416; AVX512VL-NEXT: vzeroupper 1417; AVX512VL-NEXT: retq 1418; 1419; XOP-LABEL: store_blend_load_v4i64: 1420; XOP: # %bb.0: 1421; XOP-NEXT: vmovapd (%rsi), %ymm0 1422; XOP-NEXT: vpmovsxbq {{.*#+}} xmm1 = [7,7] 1423; XOP-NEXT: vpcomltuq 16(%rdi), %xmm1, %xmm2 1424; XOP-NEXT: vpcomltuq (%rdi), %xmm1, %xmm1 1425; XOP-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 1426; XOP-NEXT: vblendvpd %ymm1, (%rdi), %ymm0, %ymm0 1427; XOP-NEXT: vmovapd %ymm0, (%rdx) 1428; XOP-NEXT: vzeroupper 1429; XOP-NEXT: retq 1430 %v0 = load <4 x i64>, ptr %a0 1431 %v1 = load <4 x i64>, ptr %a1 1432 %cmp = icmp ugt <4 x i64> %v0, <i64 7, i64 7, i64 7, i64 7> 1433 %res = select <4 x i1> %cmp, <4 x i64> %v0, <4 x i64> %v1 1434 store <4 x i64> %res, ptr %a2 1435 ret void 1436} 1437 1438define void @store_blend_load_v8i32(ptr %a0, ptr %a1, ptr %a2) { 1439; AVX1-LABEL: store_blend_load_v8i32: 1440; AVX1: # %bb.0: 1441; AVX1-NEXT: vmovaps (%rsi), %ymm0 1442; AVX1-NEXT: vmovdqa (%rdi), %xmm1 1443; AVX1-NEXT: vmovdqa 16(%rdi), %xmm2 1444; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [8,8,8,8] 1445; AVX1-NEXT: vpmaxud %xmm3, %xmm2, %xmm4 1446; AVX1-NEXT: vpcmpeqd %xmm4, %xmm2, %xmm2 1447; AVX1-NEXT: vpmaxud %xmm3, %xmm1, %xmm3 1448; AVX1-NEXT: vpcmpeqd %xmm3, %xmm1, %xmm1 1449; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 1450; AVX1-NEXT: vblendvps %ymm1, (%rdi), %ymm0, %ymm0 1451; AVX1-NEXT: vmovaps %ymm0, (%rdx) 1452; AVX1-NEXT: vzeroupper 1453; AVX1-NEXT: retq 1454; 1455; AVX2-LABEL: store_blend_load_v8i32: 1456; AVX2: # %bb.0: 1457; AVX2-NEXT: vmovdqa (%rdi), %ymm0 1458; AVX2-NEXT: vmovaps (%rsi), %ymm1 1459; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [8,8,8,8,8,8,8,8] 1460; AVX2-NEXT: vpmaxud %ymm2, %ymm0, %ymm2 1461; AVX2-NEXT: vpcmpeqd %ymm2, %ymm0, %ymm2 1462; AVX2-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0 1463; AVX2-NEXT: vmovaps %ymm0, (%rdx) 1464; AVX2-NEXT: vzeroupper 1465; AVX2-NEXT: retq 1466; 1467; AVX512F-LABEL: store_blend_load_v8i32: 1468; AVX512F: # %bb.0: 1469; AVX512F-NEXT: vmovdqa (%rdi), %ymm0 1470; AVX512F-NEXT: vmovdqa (%rsi), %ymm1 1471; AVX512F-NEXT: vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1 1472; AVX512F-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1} 1473; AVX512F-NEXT: vmovdqa %ymm1, (%rdx) 1474; AVX512F-NEXT: vzeroupper 1475; AVX512F-NEXT: retq 1476; 1477; AVX512VL-LABEL: store_blend_load_v8i32: 1478; AVX512VL: # %bb.0: 1479; AVX512VL-NEXT: vmovdqa (%rdi), %ymm0 1480; AVX512VL-NEXT: vmovdqa (%rsi), %ymm1 1481; AVX512VL-NEXT: vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1 1482; AVX512VL-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1} 1483; AVX512VL-NEXT: vmovdqa %ymm1, (%rdx) 1484; AVX512VL-NEXT: vzeroupper 1485; AVX512VL-NEXT: retq 1486; 1487; XOP-LABEL: store_blend_load_v8i32: 1488; XOP: # %bb.0: 1489; XOP-NEXT: vmovaps (%rsi), %ymm0 1490; XOP-NEXT: vbroadcastss {{.*#+}} xmm1 = [7,7,7,7] 1491; XOP-NEXT: vpcomltud 16(%rdi), %xmm1, %xmm2 1492; XOP-NEXT: vpcomltud (%rdi), %xmm1, %xmm1 1493; XOP-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 1494; XOP-NEXT: vblendvps %ymm1, (%rdi), %ymm0, %ymm0 1495; XOP-NEXT: vmovaps %ymm0, (%rdx) 1496; XOP-NEXT: vzeroupper 1497; XOP-NEXT: retq 1498 %v0 = load <8 x i32>, ptr %a0 1499 %v1 = load <8 x i32>, ptr %a1 1500 %cmp = icmp ugt <8 x i32> %v0, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> 1501 %res = select <8 x i1> %cmp, <8 x i32> %v0, <8 x i32> %v1 1502 store <8 x i32> %res, ptr %a2 1503 ret void 1504} 1505 1506define void @store_blend_load_v16i16(ptr %a0, ptr %a1, ptr %a2) { 1507; AVX1-LABEL: store_blend_load_v16i16: 1508; AVX1: # %bb.0: 1509; AVX1-NEXT: vmovdqa (%rdi), %xmm0 1510; AVX1-NEXT: vmovdqa 16(%rdi), %xmm1 1511; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [8,8,8,8,8,8,8,8] 1512; AVX1-NEXT: vpmaxuw %xmm2, %xmm0, %xmm3 1513; AVX1-NEXT: vpcmpeqw %xmm3, %xmm0, %xmm3 1514; AVX1-NEXT: vpmaxuw %xmm2, %xmm1, %xmm2 1515; AVX1-NEXT: vpcmpeqw %xmm2, %xmm1, %xmm2 1516; AVX1-NEXT: vmovdqa (%rsi), %xmm4 1517; AVX1-NEXT: vmovdqa 16(%rsi), %xmm5 1518; AVX1-NEXT: vpblendvb %xmm2, %xmm1, %xmm5, %xmm1 1519; AVX1-NEXT: vpblendvb %xmm3, %xmm0, %xmm4, %xmm0 1520; AVX1-NEXT: vmovdqa %xmm0, (%rdx) 1521; AVX1-NEXT: vmovdqa %xmm1, 16(%rdx) 1522; AVX1-NEXT: retq 1523; 1524; AVX2-LABEL: store_blend_load_v16i16: 1525; AVX2: # %bb.0: 1526; AVX2-NEXT: vmovdqa (%rdi), %ymm0 1527; AVX2-NEXT: vmovdqa (%rsi), %ymm1 1528; AVX2-NEXT: vpmaxuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm2 1529; AVX2-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm2 1530; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 1531; AVX2-NEXT: vmovdqa %ymm0, (%rdx) 1532; AVX2-NEXT: vzeroupper 1533; AVX2-NEXT: retq 1534; 1535; AVX512F-LABEL: store_blend_load_v16i16: 1536; AVX512F: # %bb.0: 1537; AVX512F-NEXT: vmovdqa (%rdi), %ymm0 1538; AVX512F-NEXT: vmovdqa (%rsi), %ymm1 1539; AVX512F-NEXT: vpmaxuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm2 1540; AVX512F-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm2 1541; AVX512F-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 1542; AVX512F-NEXT: vmovdqa %ymm0, (%rdx) 1543; AVX512F-NEXT: vzeroupper 1544; AVX512F-NEXT: retq 1545; 1546; AVX512VL-LABEL: store_blend_load_v16i16: 1547; AVX512VL: # %bb.0: 1548; AVX512VL-NEXT: vmovdqa (%rdi), %ymm0 1549; AVX512VL-NEXT: vpmaxuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1 1550; AVX512VL-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm1 1551; AVX512VL-NEXT: vpternlogq {{.*#+}} ymm1 = mem ^ (ymm1 & (ymm0 ^ mem)) 1552; AVX512VL-NEXT: vmovdqa %ymm1, (%rdx) 1553; AVX512VL-NEXT: vzeroupper 1554; AVX512VL-NEXT: retq 1555; 1556; XOP-LABEL: store_blend_load_v16i16: 1557; XOP: # %bb.0: 1558; XOP-NEXT: vmovdqa (%rdi), %ymm0 1559; XOP-NEXT: vbroadcastss {{.*#+}} xmm1 = [7,7,7,7,7,7,7,7] 1560; XOP-NEXT: vpcomltuw 16(%rdi), %xmm1, %xmm2 1561; XOP-NEXT: vpcomltuw (%rdi), %xmm1, %xmm1 1562; XOP-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 1563; XOP-NEXT: vpcmov %ymm1, (%rsi), %ymm0, %ymm0 1564; XOP-NEXT: vmovdqa %ymm0, (%rdx) 1565; XOP-NEXT: vzeroupper 1566; XOP-NEXT: retq 1567 %v0 = load <16 x i16>, ptr %a0 1568 %v1 = load <16 x i16>, ptr %a1 1569 %cmp = icmp ugt <16 x i16> %v0, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7> 1570 %res = select <16 x i1> %cmp, <16 x i16> %v0, <16 x i16> %v1 1571 store <16 x i16> %res, ptr %a2 1572 ret void 1573} 1574 1575define void @store_blend_load_v32i8(ptr %a0, ptr %a1, ptr %a2) { 1576; AVX1-LABEL: store_blend_load_v32i8: 1577; AVX1: # %bb.0: 1578; AVX1-NEXT: vmovdqa (%rdi), %xmm0 1579; AVX1-NEXT: vmovdqa 16(%rdi), %xmm1 1580; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] 1581; AVX1-NEXT: vpmaxub %xmm2, %xmm0, %xmm3 1582; AVX1-NEXT: vpcmpeqb %xmm3, %xmm0, %xmm3 1583; AVX1-NEXT: vpmaxub %xmm2, %xmm1, %xmm2 1584; AVX1-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm2 1585; AVX1-NEXT: vmovdqa (%rsi), %xmm4 1586; AVX1-NEXT: vmovdqa 16(%rsi), %xmm5 1587; AVX1-NEXT: vpblendvb %xmm2, %xmm1, %xmm5, %xmm1 1588; AVX1-NEXT: vpblendvb %xmm3, %xmm0, %xmm4, %xmm0 1589; AVX1-NEXT: vmovdqa %xmm0, (%rdx) 1590; AVX1-NEXT: vmovdqa %xmm1, 16(%rdx) 1591; AVX1-NEXT: retq 1592; 1593; AVX2-LABEL: store_blend_load_v32i8: 1594; AVX2: # %bb.0: 1595; AVX2-NEXT: vmovdqa (%rdi), %ymm0 1596; AVX2-NEXT: vmovdqa (%rsi), %ymm1 1597; AVX2-NEXT: vpmaxub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm2 1598; AVX2-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm2 1599; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 1600; AVX2-NEXT: vmovdqa %ymm0, (%rdx) 1601; AVX2-NEXT: vzeroupper 1602; AVX2-NEXT: retq 1603; 1604; AVX512F-LABEL: store_blend_load_v32i8: 1605; AVX512F: # %bb.0: 1606; AVX512F-NEXT: vmovdqa (%rdi), %ymm0 1607; AVX512F-NEXT: vmovdqa (%rsi), %ymm1 1608; AVX512F-NEXT: vpmaxub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm2 1609; AVX512F-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm2 1610; AVX512F-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 1611; AVX512F-NEXT: vmovdqa %ymm0, (%rdx) 1612; AVX512F-NEXT: vzeroupper 1613; AVX512F-NEXT: retq 1614; 1615; AVX512VL-LABEL: store_blend_load_v32i8: 1616; AVX512VL: # %bb.0: 1617; AVX512VL-NEXT: vmovdqa (%rdi), %ymm0 1618; AVX512VL-NEXT: vpmaxub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1 1619; AVX512VL-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm1 1620; AVX512VL-NEXT: vpternlogq {{.*#+}} ymm1 = mem ^ (ymm1 & (ymm0 ^ mem)) 1621; AVX512VL-NEXT: vmovdqa %ymm1, (%rdx) 1622; AVX512VL-NEXT: vzeroupper 1623; AVX512VL-NEXT: retq 1624; 1625; XOP-LABEL: store_blend_load_v32i8: 1626; XOP: # %bb.0: 1627; XOP-NEXT: vmovdqa (%rdi), %ymm0 1628; XOP-NEXT: vbroadcastss {{.*#+}} xmm1 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7] 1629; XOP-NEXT: vpcomltub 16(%rdi), %xmm1, %xmm2 1630; XOP-NEXT: vpcomltub (%rdi), %xmm1, %xmm1 1631; XOP-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 1632; XOP-NEXT: vpcmov %ymm1, (%rsi), %ymm0, %ymm0 1633; XOP-NEXT: vmovdqa %ymm0, (%rdx) 1634; XOP-NEXT: vzeroupper 1635; XOP-NEXT: retq 1636 %v0 = load <32 x i8>, ptr %a0 1637 %v1 = load <32 x i8>, ptr %a1 1638 %cmp = icmp ugt <32 x i8> %v0, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7> 1639 %res = select <32 x i1> %cmp, <32 x i8> %v0, <32 x i8> %v1 1640 store <32 x i8> %res, ptr %a2 1641 ret void 1642} 1643 1644define void @PR46531(ptr %x, ptr %y, ptr %z) { 1645; AVX12-LABEL: PR46531: 1646; AVX12: # %bb.0: 1647; AVX12-NEXT: vmovdqu (%rsi), %xmm0 1648; AVX12-NEXT: vmovdqu (%rdx), %xmm1 1649; AVX12-NEXT: vpor %xmm0, %xmm1, %xmm2 1650; AVX12-NEXT: vpxor %xmm0, %xmm1, %xmm0 1651; AVX12-NEXT: vpslld $31, %xmm1, %xmm1 1652; AVX12-NEXT: vblendvps %xmm1, %xmm0, %xmm2, %xmm0 1653; AVX12-NEXT: vmovups %xmm0, (%rdi) 1654; AVX12-NEXT: retq 1655; 1656; AVX512F-LABEL: PR46531: 1657; AVX512F: # %bb.0: 1658; AVX512F-NEXT: vmovdqu (%rsi), %xmm0 1659; AVX512F-NEXT: vmovdqu (%rdx), %xmm1 1660; AVX512F-NEXT: vpor %xmm0, %xmm1, %xmm2 1661; AVX512F-NEXT: vptestnmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %k1 1662; AVX512F-NEXT: vpxor %xmm0, %xmm1, %xmm0 1663; AVX512F-NEXT: vmovdqa32 %zmm2, %zmm0 {%k1} 1664; AVX512F-NEXT: vmovdqu %xmm0, (%rdi) 1665; AVX512F-NEXT: vzeroupper 1666; AVX512F-NEXT: retq 1667; 1668; AVX512VL-LABEL: PR46531: 1669; AVX512VL: # %bb.0: 1670; AVX512VL-NEXT: vmovdqu (%rsi), %xmm0 1671; AVX512VL-NEXT: vmovdqu (%rdx), %xmm1 1672; AVX512VL-NEXT: vptestnmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %k1 1673; AVX512VL-NEXT: vpxor %xmm0, %xmm1, %xmm2 1674; AVX512VL-NEXT: vpord %xmm0, %xmm1, %xmm2 {%k1} 1675; AVX512VL-NEXT: vmovdqu %xmm2, (%rdi) 1676; AVX512VL-NEXT: retq 1677; 1678; XOP-LABEL: PR46531: 1679; XOP: # %bb.0: 1680; XOP-NEXT: vmovdqu (%rsi), %xmm0 1681; XOP-NEXT: vmovdqu (%rdx), %xmm1 1682; XOP-NEXT: vpor %xmm0, %xmm1, %xmm2 1683; XOP-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm3 1684; XOP-NEXT: vpxor %xmm4, %xmm4, %xmm4 1685; XOP-NEXT: vpcomneqd %xmm4, %xmm3, %xmm3 1686; XOP-NEXT: vpxor %xmm0, %xmm1, %xmm0 1687; XOP-NEXT: vblendvps %xmm3, %xmm0, %xmm2, %xmm0 1688; XOP-NEXT: vmovups %xmm0, (%rdi) 1689; XOP-NEXT: retq 1690 %a = load <4 x i32>, ptr %y, align 4 1691 %b = load <4 x i32>, ptr %z, align 4 1692 %or = or <4 x i32> %b, %a 1693 %and = and <4 x i32> %b, <i32 1, i32 1, i32 1, i32 1> 1694 %cmp = icmp eq <4 x i32> %and, zeroinitializer 1695 %xor = xor <4 x i32> %b, %a 1696 %sel = select <4 x i1> %cmp, <4 x i32> %or, <4 x i32> %xor 1697 store <4 x i32> %sel, ptr %x, align 4 1698 ret void 1699} 1700 1701define <64 x i8> @PR110875(<32 x i8> %a0, <32 x i8> %a1, i64 %a2) { 1702; AVX1-LABEL: PR110875: 1703; AVX1: # %bb.0: 1704; AVX1-NEXT: vmovq %rdi, %xmm2 1705; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1706; AVX1-NEXT: vpshufhw {{.*#+}} xmm3 = xmm2[0,1,2,3,4,4,5,5] 1707; AVX1-NEXT: vpshufhw {{.*#+}} xmm4 = xmm2[0,1,2,3,6,6,7,7] 1708; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3 1709; AVX1-NEXT: vshufps {{.*#+}} ymm3 = ymm3[2,2,3,3,6,6,7,7] 1710; AVX1-NEXT: vpshuflw {{.*#+}} xmm4 = xmm2[0,0,1,1,4,5,6,7] 1711; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[2,2,3,3,4,5,6,7] 1712; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm4, %ymm2 1713; AVX1-NEXT: vshufps {{.*#+}} ymm2 = ymm2[0,0,1,1,4,4,5,5] 1714; AVX1-NEXT: vbroadcastsd {{.*#+}} ymm4 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] 1715; AVX1-NEXT: vandps %ymm4, %ymm2, %ymm2 1716; AVX1-NEXT: vandps %ymm4, %ymm3, %ymm3 1717; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 1718; AVX1-NEXT: vpxor %xmm5, %xmm5, %xmm5 1719; AVX1-NEXT: vpcmpeqb %xmm5, %xmm4, %xmm4 1720; AVX1-NEXT: vpcmpeqb %xmm5, %xmm3, %xmm3 1721; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3 1722; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4 1723; AVX1-NEXT: vpcmpeqb %xmm5, %xmm4, %xmm4 1724; AVX1-NEXT: vpcmpeqb %xmm5, %xmm2, %xmm2 1725; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm2, %ymm2 1726; AVX1-NEXT: vbroadcastss {{.*#+}} ymm4 = [20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20] 1727; AVX1-NEXT: vandnps %ymm4, %ymm2, %ymm5 1728; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0 1729; AVX1-NEXT: vorps %ymm5, %ymm0, %ymm0 1730; AVX1-NEXT: vandnps %ymm4, %ymm3, %ymm2 1731; AVX1-NEXT: vandps %ymm3, %ymm1, %ymm1 1732; AVX1-NEXT: vorps %ymm2, %ymm1, %ymm1 1733; AVX1-NEXT: retq 1734; 1735; AVX2-LABEL: PR110875: 1736; AVX2: # %bb.0: 1737; AVX2-NEXT: vmovq %rdi, %xmm2 1738; AVX2-NEXT: vpbroadcastq %xmm2, %ymm2 1739; AVX2-NEXT: vpshufb {{.*#+}} ymm3 = ymm2[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,18,18,18,18,18,18,18,18,19,19,19,19,19,19,19,19] 1740; AVX2-NEXT: vpshufb {{.*#+}} ymm2 = ymm2[4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,22,22,22,22,22,22,22,22,23,23,23,23,23,23,23,23] 1741; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm4 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] 1742; AVX2-NEXT: vpand %ymm4, %ymm2, %ymm2 1743; AVX2-NEXT: vpand %ymm4, %ymm3, %ymm3 1744; AVX2-NEXT: vpcmpeqb %ymm4, %ymm3, %ymm3 1745; AVX2-NEXT: vpbroadcastb {{.*#+}} ymm5 = [20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20] 1746; AVX2-NEXT: vpblendvb %ymm3, %ymm5, %ymm0, %ymm0 1747; AVX2-NEXT: vpcmpeqb %ymm4, %ymm2, %ymm2 1748; AVX2-NEXT: vpblendvb %ymm2, %ymm5, %ymm1, %ymm1 1749; AVX2-NEXT: retq 1750; 1751; AVX512F-LABEL: PR110875: 1752; AVX512F: # %bb.0: 1753; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 1754; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm1 1755; AVX512F-NEXT: vmovq %rdi, %xmm0 1756; AVX512F-NEXT: vpbroadcastq %xmm0, %ymm0 1757; AVX512F-NEXT: vpshufb {{.*#+}} ymm2 = ymm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,18,18,18,18,18,18,18,18,19,19,19,19,19,19,19,19] 1758; AVX512F-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,22,22,22,22,22,22,22,22,23,23,23,23,23,23,23,23] 1759; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm2, %zmm0 1760; AVX512F-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0 1761; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm2 1762; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 1763; AVX512F-NEXT: vpcmpeqb %ymm3, %ymm2, %ymm2 1764; AVX512F-NEXT: vpcmpeqb %ymm3, %ymm0, %ymm0 1765; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 1766; AVX512F-NEXT: vpternlogd {{.*#+}} zmm0 = mem ^ (zmm0 & (zmm1 ^ mem)) 1767; AVX512F-NEXT: retq 1768; 1769; AVX512VL-LABEL: PR110875: 1770; AVX512VL: # %bb.0: 1771; AVX512VL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 1772; AVX512VL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm1 1773; AVX512VL-NEXT: vpbroadcastq %rdi, %ymm0 1774; AVX512VL-NEXT: vpshufb {{.*#+}} ymm2 = ymm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,18,18,18,18,18,18,18,18,19,19,19,19,19,19,19,19] 1775; AVX512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,22,22,22,22,22,22,22,22,23,23,23,23,23,23,23,23] 1776; AVX512VL-NEXT: vinserti64x4 $1, %ymm0, %zmm2, %zmm0 1777; AVX512VL-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0 1778; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm2 1779; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3 1780; AVX512VL-NEXT: vpcmpeqb %ymm3, %ymm2, %ymm2 1781; AVX512VL-NEXT: vpcmpeqb %ymm3, %ymm0, %ymm0 1782; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 1783; AVX512VL-NEXT: vpternlogd {{.*#+}} zmm0 = mem ^ (zmm0 & (zmm1 ^ mem)) 1784; AVX512VL-NEXT: retq 1785; 1786; XOP-LABEL: PR110875: 1787; XOP: # %bb.0: 1788; XOP-NEXT: vmovq %rdi, %xmm2 1789; XOP-NEXT: vpunpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1790; XOP-NEXT: vpshufhw {{.*#+}} xmm3 = xmm2[0,1,2,3,4,4,5,5] 1791; XOP-NEXT: vpshufhw {{.*#+}} xmm4 = xmm2[0,1,2,3,6,6,7,7] 1792; XOP-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3 1793; XOP-NEXT: vshufps {{.*#+}} ymm3 = ymm3[2,2,3,3,6,6,7,7] 1794; XOP-NEXT: vpshuflw {{.*#+}} xmm4 = xmm2[0,0,1,1,4,5,6,7] 1795; XOP-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[2,2,3,3,4,5,6,7] 1796; XOP-NEXT: vinsertf128 $1, %xmm2, %ymm4, %ymm2 1797; XOP-NEXT: vshufps {{.*#+}} ymm2 = ymm2[0,0,1,1,4,4,5,5] 1798; XOP-NEXT: vbroadcastsd {{.*#+}} ymm4 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] 1799; XOP-NEXT: vandps %ymm4, %ymm2, %ymm2 1800; XOP-NEXT: vandps %ymm4, %ymm3, %ymm3 1801; XOP-NEXT: vextractf128 $1, %ymm3, %xmm4 1802; XOP-NEXT: vpxor %xmm5, %xmm5, %xmm5 1803; XOP-NEXT: vpcomeqb %xmm5, %xmm4, %xmm4 1804; XOP-NEXT: vpcomeqb %xmm5, %xmm3, %xmm3 1805; XOP-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3 1806; XOP-NEXT: vextractf128 $1, %ymm2, %xmm4 1807; XOP-NEXT: vpcomeqb %xmm5, %xmm4, %xmm4 1808; XOP-NEXT: vpcomeqb %xmm5, %xmm2, %xmm2 1809; XOP-NEXT: vinsertf128 $1, %xmm4, %ymm2, %ymm2 1810; XOP-NEXT: vbroadcastss {{.*#+}} ymm4 = [20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20] 1811; XOP-NEXT: vpcmov %ymm2, %ymm4, %ymm0, %ymm0 1812; XOP-NEXT: vpcmov %ymm3, %ymm4, %ymm1, %ymm1 1813; XOP-NEXT: retq 1814 %concat = shufflevector <32 x i8> %a0, <32 x i8> %a1, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63> 1815 %scl = insertelement <1 x i64> poison, i64 %a2, i64 0 1816 %splat = shufflevector <1 x i64> %scl, <1 x i64> poison, <8 x i32> <i32 0, i32 poison, i32 0, i32 poison, i32 0, i32 poison, i32 0, i32 poison> 1817 %ref = bitcast <8 x i64> %splat to <64 x i8> 1818 %shuf = shufflevector <64 x i8> %ref, <64 x i8> poison, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 18, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 19, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 36, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 37, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 54, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55, i32 55> 1819 %mask = and <64 x i8> %shuf, <i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 -128, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 -128, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 -128, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 -128, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 -128, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 -128, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 -128, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 -128> 1820 %cmp = icmp eq <64 x i8> %mask, zeroinitializer 1821 %res = select <64 x i1> %cmp, <64 x i8> %concat, <64 x i8> <i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20, i8 20> 1822 ret <64 x i8> %res 1823} 1824 1825attributes #0 = { "no-nans-fp-math"="true" } 1826;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: 1827; AVX: {{.*}} 1828