1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE,SSE42 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefixes=AVX512,AVX512F 7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512dq,+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512DQBW 8 9; PR28925 10 11define <4 x i32> @test1(<4 x i1> %cond, <4 x i32> %x) { 12; SSE-LABEL: test1: 13; SSE: # %bb.0: 14; SSE-NEXT: pslld $31, %xmm0 15; SSE-NEXT: psrad $31, %xmm0 16; SSE-NEXT: pandn %xmm1, %xmm0 17; SSE-NEXT: retq 18; 19; AVX-LABEL: test1: 20; AVX: # %bb.0: 21; AVX-NEXT: vpslld $31, %xmm0, %xmm0 22; AVX-NEXT: vpsrad $31, %xmm0, %xmm0 23; AVX-NEXT: vpandn %xmm1, %xmm0, %xmm0 24; AVX-NEXT: retq 25; 26; AVX512F-LABEL: test1: 27; AVX512F: # %bb.0: 28; AVX512F-NEXT: vpslld $31, %xmm0, %xmm0 29; AVX512F-NEXT: vptestnmd %xmm0, %xmm0, %k1 30; AVX512F-NEXT: vmovdqa32 %xmm1, %xmm0 {%k1} {z} 31; AVX512F-NEXT: retq 32; 33; AVX512DQBW-LABEL: test1: 34; AVX512DQBW: # %bb.0: 35; AVX512DQBW-NEXT: vpslld $31, %xmm0, %xmm0 36; AVX512DQBW-NEXT: vpmovd2m %xmm0, %k0 37; AVX512DQBW-NEXT: knotw %k0, %k1 38; AVX512DQBW-NEXT: vmovdqa32 %xmm1, %xmm0 {%k1} {z} 39; AVX512DQBW-NEXT: retq 40 %r = select <4 x i1> %cond, <4 x i32> zeroinitializer, <4 x i32> %x 41 ret <4 x i32> %r 42} 43 44define <4 x i32> @test2(<4 x float> %a, <4 x float> %b, <4 x i32> %x) { 45; SSE-LABEL: test2: 46; SSE: # %bb.0: 47; SSE-NEXT: cmpneqps %xmm1, %xmm0 48; SSE-NEXT: andps %xmm2, %xmm0 49; SSE-NEXT: retq 50; 51; AVX-LABEL: test2: 52; AVX: # %bb.0: 53; AVX-NEXT: vcmpneqps %xmm1, %xmm0, %xmm0 54; AVX-NEXT: vandps %xmm2, %xmm0, %xmm0 55; AVX-NEXT: retq 56; 57; AVX512-LABEL: test2: 58; AVX512: # %bb.0: 59; AVX512-NEXT: vcmpneqps %xmm1, %xmm0, %k1 60; AVX512-NEXT: vmovdqa32 %xmm2, %xmm0 {%k1} {z} 61; AVX512-NEXT: retq 62 %cond = fcmp oeq <4 x float> %a, %b 63 %r = select <4 x i1> %cond, <4 x i32> zeroinitializer, <4 x i32> %x 64 ret <4 x i32> %r 65} 66 67define float @fsel_zero_false_val(float %a, float %b, float %x) { 68; SSE-LABEL: fsel_zero_false_val: 69; SSE: # %bb.0: 70; SSE-NEXT: cmpeqss %xmm1, %xmm0 71; SSE-NEXT: andps %xmm2, %xmm0 72; SSE-NEXT: retq 73; 74; AVX-LABEL: fsel_zero_false_val: 75; AVX: # %bb.0: 76; AVX-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 77; AVX-NEXT: vandps %xmm2, %xmm0, %xmm0 78; AVX-NEXT: retq 79; 80; AVX512-LABEL: fsel_zero_false_val: 81; AVX512: # %bb.0: 82; AVX512-NEXT: vcmpeqss %xmm1, %xmm0, %k1 83; AVX512-NEXT: vmovss %xmm2, %xmm2, %xmm0 {%k1} {z} 84; AVX512-NEXT: retq 85 %cond = fcmp oeq float %a, %b 86 %r = select i1 %cond, float %x, float 0.0 87 ret float %r 88} 89 90define float @fsel_zero_true_val(float %a, float %b, float %x) { 91; SSE-LABEL: fsel_zero_true_val: 92; SSE: # %bb.0: 93; SSE-NEXT: cmpeqss %xmm1, %xmm0 94; SSE-NEXT: andnps %xmm2, %xmm0 95; SSE-NEXT: retq 96; 97; AVX-LABEL: fsel_zero_true_val: 98; AVX: # %bb.0: 99; AVX-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 100; AVX-NEXT: vandnps %xmm2, %xmm0, %xmm0 101; AVX-NEXT: retq 102; 103; AVX512-LABEL: fsel_zero_true_val: 104; AVX512: # %bb.0: 105; AVX512-NEXT: vcmpeqss %xmm1, %xmm0, %k1 106; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0 107; AVX512-NEXT: vmovss %xmm0, %xmm2, %xmm2 {%k1} 108; AVX512-NEXT: vmovaps %xmm2, %xmm0 109; AVX512-NEXT: retq 110 %cond = fcmp oeq float %a, %b 111 %r = select i1 %cond, float 0.0, float %x 112 ret float %r 113} 114 115define double @fsel_nonzero_false_val(double %x, double %y, double %z) { 116; SSE-LABEL: fsel_nonzero_false_val: 117; SSE: # %bb.0: 118; SSE-NEXT: cmpeqsd %xmm1, %xmm0 119; SSE-NEXT: andpd %xmm0, %xmm2 120; SSE-NEXT: movsd {{.*#+}} xmm1 = [4.2E+1,0.0E+0] 121; SSE-NEXT: andnpd %xmm1, %xmm0 122; SSE-NEXT: orpd %xmm2, %xmm0 123; SSE-NEXT: retq 124; 125; AVX-LABEL: fsel_nonzero_false_val: 126; AVX: # %bb.0: 127; AVX-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 128; AVX-NEXT: vmovddup {{.*#+}} xmm1 = [4.2E+1,4.2E+1] 129; AVX-NEXT: # xmm1 = mem[0,0] 130; AVX-NEXT: vblendvpd %xmm0, %xmm2, %xmm1, %xmm0 131; AVX-NEXT: retq 132; 133; AVX512-LABEL: fsel_nonzero_false_val: 134; AVX512: # %bb.0: 135; AVX512-NEXT: vcmpeqsd %xmm1, %xmm0, %k1 136; AVX512-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] 137; AVX512-NEXT: vmovsd %xmm2, %xmm0, %xmm0 {%k1} 138; AVX512-NEXT: retq 139 %cond = fcmp oeq double %x, %y 140 %r = select i1 %cond, double %z, double 42.0 141 ret double %r 142} 143 144define double @fsel_nonzero_true_val(double %x, double %y, double %z) { 145; SSE-LABEL: fsel_nonzero_true_val: 146; SSE: # %bb.0: 147; SSE-NEXT: cmpeqsd %xmm1, %xmm0 148; SSE-NEXT: movsd {{.*#+}} xmm1 = [4.2E+1,0.0E+0] 149; SSE-NEXT: andpd %xmm0, %xmm1 150; SSE-NEXT: andnpd %xmm2, %xmm0 151; SSE-NEXT: orpd %xmm1, %xmm0 152; SSE-NEXT: retq 153; 154; AVX-LABEL: fsel_nonzero_true_val: 155; AVX: # %bb.0: 156; AVX-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 157; AVX-NEXT: vblendvpd %xmm0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm0 158; AVX-NEXT: retq 159; 160; AVX512-LABEL: fsel_nonzero_true_val: 161; AVX512: # %bb.0: 162; AVX512-NEXT: vcmpeqsd %xmm1, %xmm0, %k1 163; AVX512-NEXT: vmovsd {{.*#+}} xmm2 {%k1} = [4.2E+1,0.0E+0] 164; AVX512-NEXT: vmovapd %xmm2, %xmm0 165; AVX512-NEXT: retq 166 %cond = fcmp oeq double %x, %y 167 %r = select i1 %cond, double 42.0, double %z 168 ret double %r 169} 170 171define double @fsel_nonzero_constants(double %x, double %y) { 172; SSE-LABEL: fsel_nonzero_constants: 173; SSE: # %bb.0: 174; SSE-NEXT: cmpeqsd %xmm1, %xmm0 175; SSE-NEXT: movq %xmm0, %rax 176; SSE-NEXT: andl $1, %eax 177; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 178; SSE-NEXT: retq 179; 180; AVX-LABEL: fsel_nonzero_constants: 181; AVX: # %bb.0: 182; AVX-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 183; AVX-NEXT: vmovddup {{.*#+}} xmm1 = [4.2E+1,4.2E+1] 184; AVX-NEXT: # xmm1 = mem[0,0] 185; AVX-NEXT: vblendvpd %xmm0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0 186; AVX-NEXT: retq 187; 188; AVX512-LABEL: fsel_nonzero_constants: 189; AVX512: # %bb.0: 190; AVX512-NEXT: vcmpeqsd %xmm1, %xmm0, %k1 191; AVX512-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] 192; AVX512-NEXT: vmovsd {{.*#+}} xmm0 {%k1} = [1.2E+1,0.0E+0] 193; AVX512-NEXT: retq 194 %cond = fcmp oeq double %x, %y 195 %r = select i1 %cond, double 12.0, double 42.0 196 ret double %r 197} 198 199define <2 x double> @vsel_nonzero_constants(<2 x double> %x, <2 x double> %y) { 200; SSE2-LABEL: vsel_nonzero_constants: 201; SSE2: # %bb.0: 202; SSE2-NEXT: cmplepd %xmm0, %xmm1 203; SSE2-NEXT: movsd {{.*#+}} xmm2 = [4.2E+1,0.0E+0] 204; SSE2-NEXT: movapd %xmm1, %xmm0 205; SSE2-NEXT: andnpd %xmm2, %xmm0 206; SSE2-NEXT: andpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 207; SSE2-NEXT: orpd %xmm1, %xmm0 208; SSE2-NEXT: retq 209; 210; SSE42-LABEL: vsel_nonzero_constants: 211; SSE42: # %bb.0: 212; SSE42-NEXT: cmplepd %xmm0, %xmm1 213; SSE42-NEXT: movsd {{.*#+}} xmm2 = [4.2E+1,0.0E+0] 214; SSE42-NEXT: movapd %xmm1, %xmm0 215; SSE42-NEXT: blendvpd %xmm0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 216; SSE42-NEXT: movapd %xmm2, %xmm0 217; SSE42-NEXT: retq 218; 219; AVX-LABEL: vsel_nonzero_constants: 220; AVX: # %bb.0: 221; AVX-NEXT: vcmplepd %xmm0, %xmm1, %xmm0 222; AVX-NEXT: vmovsd {{.*#+}} xmm1 = [4.2E+1,0.0E+0] 223; AVX-NEXT: vblendvpd %xmm0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0 224; AVX-NEXT: retq 225; 226; AVX512-LABEL: vsel_nonzero_constants: 227; AVX512: # %bb.0: 228; AVX512-NEXT: vcmplepd %xmm0, %xmm1, %k1 229; AVX512-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] 230; AVX512-NEXT: vmovapd {{.*#+}} xmm0 {%k1} = [1.2E+1,-1.0E+0] 231; AVX512-NEXT: retq 232 %cond = fcmp oge <2 x double> %x, %y 233 %r = select <2 x i1> %cond, <2 x double> <double 12.0, double -1.0>, <2 x double> <double 42.0, double 0.0> 234 ret <2 x double> %r 235} 236 237define <16 x i8> @signbit_mask_v16i8(<16 x i8> %a, <16 x i8> %b) { 238; SSE-LABEL: signbit_mask_v16i8: 239; SSE: # %bb.0: 240; SSE-NEXT: pxor %xmm2, %xmm2 241; SSE-NEXT: pcmpgtb %xmm0, %xmm2 242; SSE-NEXT: pand %xmm1, %xmm2 243; SSE-NEXT: movdqa %xmm2, %xmm0 244; SSE-NEXT: retq 245; 246; AVX-LABEL: signbit_mask_v16i8: 247; AVX: # %bb.0: 248; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2 249; AVX-NEXT: vpcmpgtb %xmm0, %xmm2, %xmm0 250; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 251; AVX-NEXT: retq 252; 253; AVX512-LABEL: signbit_mask_v16i8: 254; AVX512: # %bb.0: 255; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2 256; AVX512-NEXT: vpcmpgtb %xmm0, %xmm2, %xmm0 257; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0 258; AVX512-NEXT: retq 259 %cond = icmp slt <16 x i8> %a, zeroinitializer 260 %r = select <16 x i1> %cond, <16 x i8> %b, <16 x i8> zeroinitializer 261 ret <16 x i8> %r 262} 263 264define <8 x i16> @signbit_mask_v8i16(<8 x i16> %a, <8 x i16> %b) { 265; SSE-LABEL: signbit_mask_v8i16: 266; SSE: # %bb.0: 267; SSE-NEXT: psraw $15, %xmm0 268; SSE-NEXT: pand %xmm1, %xmm0 269; SSE-NEXT: retq 270; 271; AVX-LABEL: signbit_mask_v8i16: 272; AVX: # %bb.0: 273; AVX-NEXT: vpsraw $15, %xmm0, %xmm0 274; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 275; AVX-NEXT: retq 276; 277; AVX512-LABEL: signbit_mask_v8i16: 278; AVX512: # %bb.0: 279; AVX512-NEXT: vpsraw $15, %xmm0, %xmm0 280; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0 281; AVX512-NEXT: retq 282 %cond = icmp slt <8 x i16> %a, zeroinitializer 283 %r = select <8 x i1> %cond, <8 x i16> %b, <8 x i16> zeroinitializer 284 ret <8 x i16> %r 285} 286 287define <4 x i32> @signbit_mask_v4i32(<4 x i32> %a, <4 x i32> %b) { 288; SSE-LABEL: signbit_mask_v4i32: 289; SSE: # %bb.0: 290; SSE-NEXT: psrad $31, %xmm0 291; SSE-NEXT: pand %xmm1, %xmm0 292; SSE-NEXT: retq 293; 294; AVX-LABEL: signbit_mask_v4i32: 295; AVX: # %bb.0: 296; AVX-NEXT: vpsrad $31, %xmm0, %xmm0 297; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 298; AVX-NEXT: retq 299; 300; AVX512-LABEL: signbit_mask_v4i32: 301; AVX512: # %bb.0: 302; AVX512-NEXT: vpsrad $31, %xmm0, %xmm0 303; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0 304; AVX512-NEXT: retq 305 %cond = icmp slt <4 x i32> %a, zeroinitializer 306 %r = select <4 x i1> %cond, <4 x i32> %b, <4 x i32> zeroinitializer 307 ret <4 x i32> %r 308} 309 310define <2 x i64> @signbit_mask_v2i64(<2 x i64> %a, <2 x i64> %b) { 311; SSE2-LABEL: signbit_mask_v2i64: 312; SSE2: # %bb.0: 313; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 314; SSE2-NEXT: psrad $31, %xmm0 315; SSE2-NEXT: pand %xmm1, %xmm0 316; SSE2-NEXT: retq 317; 318; SSE42-LABEL: signbit_mask_v2i64: 319; SSE42: # %bb.0: 320; SSE42-NEXT: pxor %xmm2, %xmm2 321; SSE42-NEXT: pcmpgtq %xmm0, %xmm2 322; SSE42-NEXT: pand %xmm1, %xmm2 323; SSE42-NEXT: movdqa %xmm2, %xmm0 324; SSE42-NEXT: retq 325; 326; AVX-LABEL: signbit_mask_v2i64: 327; AVX: # %bb.0: 328; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2 329; AVX-NEXT: vpcmpgtq %xmm0, %xmm2, %xmm0 330; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 331; AVX-NEXT: retq 332; 333; AVX512-LABEL: signbit_mask_v2i64: 334; AVX512: # %bb.0: 335; AVX512-NEXT: vpsraq $63, %xmm0, %xmm0 336; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0 337; AVX512-NEXT: retq 338 %cond = icmp slt <2 x i64> %a, zeroinitializer 339 %r = select <2 x i1> %cond, <2 x i64> %b, <2 x i64> zeroinitializer 340 ret <2 x i64> %r 341} 342 343; Swap cmp pred and select ops. This is logically equivalent to the above test. 344 345define <2 x i64> @signbit_mask_swap_v2i64(<2 x i64> %a, <2 x i64> %b) { 346; SSE2-LABEL: signbit_mask_swap_v2i64: 347; SSE2: # %bb.0: 348; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 349; SSE2-NEXT: psrad $31, %xmm0 350; SSE2-NEXT: pand %xmm1, %xmm0 351; SSE2-NEXT: retq 352; 353; SSE42-LABEL: signbit_mask_swap_v2i64: 354; SSE42: # %bb.0: 355; SSE42-NEXT: pxor %xmm2, %xmm2 356; SSE42-NEXT: pcmpgtq %xmm0, %xmm2 357; SSE42-NEXT: pand %xmm1, %xmm2 358; SSE42-NEXT: movdqa %xmm2, %xmm0 359; SSE42-NEXT: retq 360; 361; AVX-LABEL: signbit_mask_swap_v2i64: 362; AVX: # %bb.0: 363; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2 364; AVX-NEXT: vpcmpgtq %xmm0, %xmm2, %xmm0 365; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 366; AVX-NEXT: retq 367; 368; AVX512-LABEL: signbit_mask_swap_v2i64: 369; AVX512: # %bb.0: 370; AVX512-NEXT: vpsraq $63, %xmm0, %xmm0 371; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0 372; AVX512-NEXT: retq 373 %cond = icmp sgt <2 x i64> %a, <i64 -1, i64 -1> 374 %r = select <2 x i1> %cond, <2 x i64> zeroinitializer, <2 x i64> %b 375 ret <2 x i64> %r 376} 377 378define <32 x i8> @signbit_mask_v32i8(<32 x i8> %a, <32 x i8> %b) { 379; SSE-LABEL: signbit_mask_v32i8: 380; SSE: # %bb.0: 381; SSE-NEXT: pxor %xmm4, %xmm4 382; SSE-NEXT: pxor %xmm5, %xmm5 383; SSE-NEXT: pcmpgtb %xmm0, %xmm5 384; SSE-NEXT: pand %xmm2, %xmm5 385; SSE-NEXT: pcmpgtb %xmm1, %xmm4 386; SSE-NEXT: pand %xmm3, %xmm4 387; SSE-NEXT: movdqa %xmm5, %xmm0 388; SSE-NEXT: movdqa %xmm4, %xmm1 389; SSE-NEXT: retq 390; 391; AVX1-LABEL: signbit_mask_v32i8: 392; AVX1: # %bb.0: 393; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 394; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 395; AVX1-NEXT: vpcmpgtb %xmm2, %xmm3, %xmm2 396; AVX1-NEXT: vpcmpgtb %xmm0, %xmm3, %xmm0 397; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 398; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 399; AVX1-NEXT: retq 400; 401; AVX2-LABEL: signbit_mask_v32i8: 402; AVX2: # %bb.0: 403; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 404; AVX2-NEXT: vpcmpgtb %ymm0, %ymm2, %ymm0 405; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 406; AVX2-NEXT: retq 407; 408; AVX512-LABEL: signbit_mask_v32i8: 409; AVX512: # %bb.0: 410; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2 411; AVX512-NEXT: vpcmpgtb %ymm0, %ymm2, %ymm0 412; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0 413; AVX512-NEXT: retq 414 %cond = icmp slt <32 x i8> %a, zeroinitializer 415 %r = select <32 x i1> %cond, <32 x i8> %b, <32 x i8> zeroinitializer 416 ret <32 x i8> %r 417} 418 419define <16 x i16> @signbit_mask_v16i16(<16 x i16> %a, <16 x i16> %b) { 420; SSE-LABEL: signbit_mask_v16i16: 421; SSE: # %bb.0: 422; SSE-NEXT: psraw $15, %xmm0 423; SSE-NEXT: pand %xmm2, %xmm0 424; SSE-NEXT: psraw $15, %xmm1 425; SSE-NEXT: pand %xmm3, %xmm1 426; SSE-NEXT: retq 427; 428; AVX1-LABEL: signbit_mask_v16i16: 429; AVX1: # %bb.0: 430; AVX1-NEXT: vpsraw $15, %xmm0, %xmm2 431; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 432; AVX1-NEXT: vpsraw $15, %xmm0, %xmm0 433; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 434; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 435; AVX1-NEXT: retq 436; 437; AVX2-LABEL: signbit_mask_v16i16: 438; AVX2: # %bb.0: 439; AVX2-NEXT: vpsraw $15, %ymm0, %ymm0 440; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 441; AVX2-NEXT: retq 442; 443; AVX512-LABEL: signbit_mask_v16i16: 444; AVX512: # %bb.0: 445; AVX512-NEXT: vpsraw $15, %ymm0, %ymm0 446; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0 447; AVX512-NEXT: retq 448 %cond = icmp slt <16 x i16> %a, zeroinitializer 449 %r = select <16 x i1> %cond, <16 x i16> %b, <16 x i16> zeroinitializer 450 ret <16 x i16> %r 451} 452 453define <8 x i32> @signbit_mask_v8i32(<8 x i32> %a, <8 x i32> %b) { 454; SSE-LABEL: signbit_mask_v8i32: 455; SSE: # %bb.0: 456; SSE-NEXT: psrad $31, %xmm0 457; SSE-NEXT: pand %xmm2, %xmm0 458; SSE-NEXT: psrad $31, %xmm1 459; SSE-NEXT: pand %xmm3, %xmm1 460; SSE-NEXT: retq 461; 462; AVX1-LABEL: signbit_mask_v8i32: 463; AVX1: # %bb.0: 464; AVX1-NEXT: vpsrad $31, %xmm0, %xmm2 465; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 466; AVX1-NEXT: vpsrad $31, %xmm0, %xmm0 467; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 468; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 469; AVX1-NEXT: retq 470; 471; AVX2-LABEL: signbit_mask_v8i32: 472; AVX2: # %bb.0: 473; AVX2-NEXT: vpsrad $31, %ymm0, %ymm0 474; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 475; AVX2-NEXT: retq 476; 477; AVX512-LABEL: signbit_mask_v8i32: 478; AVX512: # %bb.0: 479; AVX512-NEXT: vpsrad $31, %ymm0, %ymm0 480; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0 481; AVX512-NEXT: retq 482 %cond = icmp slt <8 x i32> %a, zeroinitializer 483 %r = select <8 x i1> %cond, <8 x i32> %b, <8 x i32> zeroinitializer 484 ret <8 x i32> %r 485} 486 487; Swap cmp pred and select ops. This is logically equivalent to the above test. 488 489define <8 x i32> @signbit_mask_swap_v8i32(<8 x i32> %a, <8 x i32> %b) { 490; SSE-LABEL: signbit_mask_swap_v8i32: 491; SSE: # %bb.0: 492; SSE-NEXT: psrad $31, %xmm0 493; SSE-NEXT: pand %xmm2, %xmm0 494; SSE-NEXT: psrad $31, %xmm1 495; SSE-NEXT: pand %xmm3, %xmm1 496; SSE-NEXT: retq 497; 498; AVX1-LABEL: signbit_mask_swap_v8i32: 499; AVX1: # %bb.0: 500; AVX1-NEXT: vpsrad $31, %xmm0, %xmm2 501; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 502; AVX1-NEXT: vpsrad $31, %xmm0, %xmm0 503; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 504; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 505; AVX1-NEXT: retq 506; 507; AVX2-LABEL: signbit_mask_swap_v8i32: 508; AVX2: # %bb.0: 509; AVX2-NEXT: vpsrad $31, %ymm0, %ymm0 510; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 511; AVX2-NEXT: retq 512; 513; AVX512-LABEL: signbit_mask_swap_v8i32: 514; AVX512: # %bb.0: 515; AVX512-NEXT: vpsrad $31, %ymm0, %ymm0 516; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0 517; AVX512-NEXT: retq 518 %cond = icmp sgt <8 x i32> %a, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1> 519 %r = select <8 x i1> %cond, <8 x i32> zeroinitializer, <8 x i32> %b 520 ret <8 x i32> %r 521} 522 523define <4 x i64> @signbit_mask_v4i64(<4 x i64> %a, <4 x i64> %b) { 524; SSE2-LABEL: signbit_mask_v4i64: 525; SSE2: # %bb.0: 526; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 527; SSE2-NEXT: psrad $31, %xmm0 528; SSE2-NEXT: pand %xmm2, %xmm0 529; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 530; SSE2-NEXT: psrad $31, %xmm1 531; SSE2-NEXT: pand %xmm3, %xmm1 532; SSE2-NEXT: retq 533; 534; SSE42-LABEL: signbit_mask_v4i64: 535; SSE42: # %bb.0: 536; SSE42-NEXT: pxor %xmm4, %xmm4 537; SSE42-NEXT: pxor %xmm5, %xmm5 538; SSE42-NEXT: pcmpgtq %xmm0, %xmm5 539; SSE42-NEXT: pand %xmm2, %xmm5 540; SSE42-NEXT: pcmpgtq %xmm1, %xmm4 541; SSE42-NEXT: pand %xmm3, %xmm4 542; SSE42-NEXT: movdqa %xmm5, %xmm0 543; SSE42-NEXT: movdqa %xmm4, %xmm1 544; SSE42-NEXT: retq 545; 546; AVX1-LABEL: signbit_mask_v4i64: 547; AVX1: # %bb.0: 548; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 549; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 550; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 551; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0 552; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 553; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 554; AVX1-NEXT: retq 555; 556; AVX2-LABEL: signbit_mask_v4i64: 557; AVX2: # %bb.0: 558; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 559; AVX2-NEXT: vpcmpgtq %ymm0, %ymm2, %ymm0 560; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 561; AVX2-NEXT: retq 562; 563; AVX512-LABEL: signbit_mask_v4i64: 564; AVX512: # %bb.0: 565; AVX512-NEXT: vpsraq $63, %ymm0, %ymm0 566; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0 567; AVX512-NEXT: retq 568 %cond = icmp slt <4 x i64> %a, zeroinitializer 569 %r = select <4 x i1> %cond, <4 x i64> %b, <4 x i64> zeroinitializer 570 ret <4 x i64> %r 571} 572 573define <16 x i8> @signbit_setmask_v16i8(<16 x i8> %a, <16 x i8> %b) { 574; SSE-LABEL: signbit_setmask_v16i8: 575; SSE: # %bb.0: 576; SSE-NEXT: pxor %xmm2, %xmm2 577; SSE-NEXT: pcmpgtb %xmm0, %xmm2 578; SSE-NEXT: por %xmm1, %xmm2 579; SSE-NEXT: movdqa %xmm2, %xmm0 580; SSE-NEXT: retq 581; 582; AVX-LABEL: signbit_setmask_v16i8: 583; AVX: # %bb.0: 584; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2 585; AVX-NEXT: vpcmpgtb %xmm0, %xmm2, %xmm0 586; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 587; AVX-NEXT: retq 588; 589; AVX512-LABEL: signbit_setmask_v16i8: 590; AVX512: # %bb.0: 591; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2 592; AVX512-NEXT: vpcmpgtb %xmm0, %xmm2, %xmm0 593; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 594; AVX512-NEXT: retq 595 %cond = icmp slt <16 x i8> %a, zeroinitializer 596 %r = select <16 x i1> %cond, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <16 x i8> %b 597 ret <16 x i8> %r 598} 599 600; Swap cmp pred and select ops. This is logically equivalent to the above test. 601 602define <16 x i8> @signbit_setmask_swap_v16i8(<16 x i8> %a, <16 x i8> %b) { 603; SSE-LABEL: signbit_setmask_swap_v16i8: 604; SSE: # %bb.0: 605; SSE-NEXT: pxor %xmm2, %xmm2 606; SSE-NEXT: pcmpgtb %xmm0, %xmm2 607; SSE-NEXT: por %xmm1, %xmm2 608; SSE-NEXT: movdqa %xmm2, %xmm0 609; SSE-NEXT: retq 610; 611; AVX-LABEL: signbit_setmask_swap_v16i8: 612; AVX: # %bb.0: 613; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2 614; AVX-NEXT: vpcmpgtb %xmm0, %xmm2, %xmm0 615; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 616; AVX-NEXT: retq 617; 618; AVX512-LABEL: signbit_setmask_swap_v16i8: 619; AVX512: # %bb.0: 620; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2 621; AVX512-NEXT: vpcmpgtb %xmm0, %xmm2, %xmm0 622; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 623; AVX512-NEXT: retq 624 %cond = icmp sgt <16 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 625 %r = select <16 x i1> %cond, <16 x i8> %b, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 626 ret <16 x i8> %r 627} 628 629define <8 x i16> @signbit_setmask_v8i16(<8 x i16> %a, <8 x i16> %b) { 630; SSE-LABEL: signbit_setmask_v8i16: 631; SSE: # %bb.0: 632; SSE-NEXT: psraw $15, %xmm0 633; SSE-NEXT: por %xmm1, %xmm0 634; SSE-NEXT: retq 635; 636; AVX-LABEL: signbit_setmask_v8i16: 637; AVX: # %bb.0: 638; AVX-NEXT: vpsraw $15, %xmm0, %xmm0 639; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 640; AVX-NEXT: retq 641; 642; AVX512-LABEL: signbit_setmask_v8i16: 643; AVX512: # %bb.0: 644; AVX512-NEXT: vpsraw $15, %xmm0, %xmm0 645; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 646; AVX512-NEXT: retq 647 %cond = icmp slt <8 x i16> %a, zeroinitializer 648 %r = select <8 x i1> %cond, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>, <8 x i16> %b 649 ret <8 x i16> %r 650} 651 652define <4 x i32> @signbit_setmask_v4i32(<4 x i32> %a, <4 x i32> %b) { 653; SSE-LABEL: signbit_setmask_v4i32: 654; SSE: # %bb.0: 655; SSE-NEXT: psrad $31, %xmm0 656; SSE-NEXT: por %xmm1, %xmm0 657; SSE-NEXT: retq 658; 659; AVX-LABEL: signbit_setmask_v4i32: 660; AVX: # %bb.0: 661; AVX-NEXT: vpsrad $31, %xmm0, %xmm0 662; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 663; AVX-NEXT: retq 664; 665; AVX512-LABEL: signbit_setmask_v4i32: 666; AVX512: # %bb.0: 667; AVX512-NEXT: vpsrad $31, %xmm0, %xmm0 668; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 669; AVX512-NEXT: retq 670 %cond = icmp slt <4 x i32> %a, zeroinitializer 671 %r = select <4 x i1> %cond, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %b 672 ret <4 x i32> %r 673} 674 675define <2 x i64> @signbit_setmask_v2i64(<2 x i64> %a, <2 x i64> %b) { 676; SSE2-LABEL: signbit_setmask_v2i64: 677; SSE2: # %bb.0: 678; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 679; SSE2-NEXT: psrad $31, %xmm0 680; SSE2-NEXT: por %xmm1, %xmm0 681; SSE2-NEXT: retq 682; 683; SSE42-LABEL: signbit_setmask_v2i64: 684; SSE42: # %bb.0: 685; SSE42-NEXT: pxor %xmm2, %xmm2 686; SSE42-NEXT: pcmpgtq %xmm0, %xmm2 687; SSE42-NEXT: por %xmm1, %xmm2 688; SSE42-NEXT: movdqa %xmm2, %xmm0 689; SSE42-NEXT: retq 690; 691; AVX-LABEL: signbit_setmask_v2i64: 692; AVX: # %bb.0: 693; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2 694; AVX-NEXT: vpcmpgtq %xmm0, %xmm2, %xmm0 695; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 696; AVX-NEXT: retq 697; 698; AVX512-LABEL: signbit_setmask_v2i64: 699; AVX512: # %bb.0: 700; AVX512-NEXT: vpsraq $63, %xmm0, %xmm0 701; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 702; AVX512-NEXT: retq 703 %cond = icmp slt <2 x i64> %a, zeroinitializer 704 %r = select <2 x i1> %cond, <2 x i64> <i64 -1, i64 -1>, <2 x i64> %b 705 ret <2 x i64> %r 706} 707 708define <32 x i8> @signbit_setmask_v32i8(<32 x i8> %a, <32 x i8> %b) { 709; SSE-LABEL: signbit_setmask_v32i8: 710; SSE: # %bb.0: 711; SSE-NEXT: pxor %xmm4, %xmm4 712; SSE-NEXT: pxor %xmm5, %xmm5 713; SSE-NEXT: pcmpgtb %xmm0, %xmm5 714; SSE-NEXT: por %xmm2, %xmm5 715; SSE-NEXT: pcmpgtb %xmm1, %xmm4 716; SSE-NEXT: por %xmm3, %xmm4 717; SSE-NEXT: movdqa %xmm5, %xmm0 718; SSE-NEXT: movdqa %xmm4, %xmm1 719; SSE-NEXT: retq 720; 721; AVX1-LABEL: signbit_setmask_v32i8: 722; AVX1: # %bb.0: 723; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 724; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 725; AVX1-NEXT: vpcmpgtb %xmm2, %xmm3, %xmm2 726; AVX1-NEXT: vpcmpgtb %xmm0, %xmm3, %xmm0 727; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 728; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 729; AVX1-NEXT: retq 730; 731; AVX2-LABEL: signbit_setmask_v32i8: 732; AVX2: # %bb.0: 733; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 734; AVX2-NEXT: vpcmpgtb %ymm0, %ymm2, %ymm0 735; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 736; AVX2-NEXT: retq 737; 738; AVX512-LABEL: signbit_setmask_v32i8: 739; AVX512: # %bb.0: 740; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2 741; AVX512-NEXT: vpcmpgtb %ymm0, %ymm2, %ymm0 742; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 743; AVX512-NEXT: retq 744 %cond = icmp slt <32 x i8> %a, zeroinitializer 745 %r = select <32 x i1> %cond, <32 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <32 x i8> %b 746 ret <32 x i8> %r 747} 748 749define <16 x i16> @signbit_setmask_v16i16(<16 x i16> %a, <16 x i16> %b) { 750; SSE-LABEL: signbit_setmask_v16i16: 751; SSE: # %bb.0: 752; SSE-NEXT: psraw $15, %xmm0 753; SSE-NEXT: por %xmm2, %xmm0 754; SSE-NEXT: psraw $15, %xmm1 755; SSE-NEXT: por %xmm3, %xmm1 756; SSE-NEXT: retq 757; 758; AVX1-LABEL: signbit_setmask_v16i16: 759; AVX1: # %bb.0: 760; AVX1-NEXT: vpsraw $15, %xmm0, %xmm2 761; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 762; AVX1-NEXT: vpsraw $15, %xmm0, %xmm0 763; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 764; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 765; AVX1-NEXT: retq 766; 767; AVX2-LABEL: signbit_setmask_v16i16: 768; AVX2: # %bb.0: 769; AVX2-NEXT: vpsraw $15, %ymm0, %ymm0 770; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 771; AVX2-NEXT: retq 772; 773; AVX512-LABEL: signbit_setmask_v16i16: 774; AVX512: # %bb.0: 775; AVX512-NEXT: vpsraw $15, %ymm0, %ymm0 776; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 777; AVX512-NEXT: retq 778 %cond = icmp slt <16 x i16> %a, zeroinitializer 779 %r = select <16 x i1> %cond, <16 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>, <16 x i16> %b 780 ret <16 x i16> %r 781} 782 783define <8 x i32> @signbit_setmask_v8i32(<8 x i32> %a, <8 x i32> %b) { 784; SSE-LABEL: signbit_setmask_v8i32: 785; SSE: # %bb.0: 786; SSE-NEXT: psrad $31, %xmm0 787; SSE-NEXT: por %xmm2, %xmm0 788; SSE-NEXT: psrad $31, %xmm1 789; SSE-NEXT: por %xmm3, %xmm1 790; SSE-NEXT: retq 791; 792; AVX1-LABEL: signbit_setmask_v8i32: 793; AVX1: # %bb.0: 794; AVX1-NEXT: vpsrad $31, %xmm0, %xmm2 795; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 796; AVX1-NEXT: vpsrad $31, %xmm0, %xmm0 797; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 798; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 799; AVX1-NEXT: retq 800; 801; AVX2-LABEL: signbit_setmask_v8i32: 802; AVX2: # %bb.0: 803; AVX2-NEXT: vpsrad $31, %ymm0, %ymm0 804; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 805; AVX2-NEXT: retq 806; 807; AVX512-LABEL: signbit_setmask_v8i32: 808; AVX512: # %bb.0: 809; AVX512-NEXT: vpsrad $31, %ymm0, %ymm0 810; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 811; AVX512-NEXT: retq 812 %cond = icmp slt <8 x i32> %a, zeroinitializer 813 %r = select <8 x i1> %cond, <8 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, <8 x i32> %b 814 ret <8 x i32> %r 815} 816 817define <4 x i64> @signbit_setmask_v4i64(<4 x i64> %a, <4 x i64> %b) { 818; SSE2-LABEL: signbit_setmask_v4i64: 819; SSE2: # %bb.0: 820; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 821; SSE2-NEXT: psrad $31, %xmm0 822; SSE2-NEXT: por %xmm2, %xmm0 823; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 824; SSE2-NEXT: psrad $31, %xmm1 825; SSE2-NEXT: por %xmm3, %xmm1 826; SSE2-NEXT: retq 827; 828; SSE42-LABEL: signbit_setmask_v4i64: 829; SSE42: # %bb.0: 830; SSE42-NEXT: pxor %xmm4, %xmm4 831; SSE42-NEXT: pxor %xmm5, %xmm5 832; SSE42-NEXT: pcmpgtq %xmm0, %xmm5 833; SSE42-NEXT: por %xmm2, %xmm5 834; SSE42-NEXT: pcmpgtq %xmm1, %xmm4 835; SSE42-NEXT: por %xmm3, %xmm4 836; SSE42-NEXT: movdqa %xmm5, %xmm0 837; SSE42-NEXT: movdqa %xmm4, %xmm1 838; SSE42-NEXT: retq 839; 840; AVX1-LABEL: signbit_setmask_v4i64: 841; AVX1: # %bb.0: 842; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 843; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 844; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 845; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0 846; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 847; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 848; AVX1-NEXT: retq 849; 850; AVX2-LABEL: signbit_setmask_v4i64: 851; AVX2: # %bb.0: 852; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 853; AVX2-NEXT: vpcmpgtq %ymm0, %ymm2, %ymm0 854; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 855; AVX2-NEXT: retq 856; 857; AVX512-LABEL: signbit_setmask_v4i64: 858; AVX512: # %bb.0: 859; AVX512-NEXT: vpsraq $63, %ymm0, %ymm0 860; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 861; AVX512-NEXT: retq 862 %cond = icmp slt <4 x i64> %a, zeroinitializer 863 %r = select <4 x i1> %cond, <4 x i64> <i64 -1, i64 -1, i64 -1, i64 -1>, <4 x i64> %b 864 ret <4 x i64> %r 865} 866 867; Swap cmp pred and select ops. This is logically equivalent to the above test. 868 869define <4 x i64> @signbit_setmask_swap_v4i64(<4 x i64> %a, <4 x i64> %b) { 870; SSE2-LABEL: signbit_setmask_swap_v4i64: 871; SSE2: # %bb.0: 872; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 873; SSE2-NEXT: psrad $31, %xmm0 874; SSE2-NEXT: por %xmm2, %xmm0 875; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 876; SSE2-NEXT: psrad $31, %xmm1 877; SSE2-NEXT: por %xmm3, %xmm1 878; SSE2-NEXT: retq 879; 880; SSE42-LABEL: signbit_setmask_swap_v4i64: 881; SSE42: # %bb.0: 882; SSE42-NEXT: pxor %xmm4, %xmm4 883; SSE42-NEXT: pxor %xmm5, %xmm5 884; SSE42-NEXT: pcmpgtq %xmm0, %xmm5 885; SSE42-NEXT: por %xmm2, %xmm5 886; SSE42-NEXT: pcmpgtq %xmm1, %xmm4 887; SSE42-NEXT: por %xmm3, %xmm4 888; SSE42-NEXT: movdqa %xmm5, %xmm0 889; SSE42-NEXT: movdqa %xmm4, %xmm1 890; SSE42-NEXT: retq 891; 892; AVX1-LABEL: signbit_setmask_swap_v4i64: 893; AVX1: # %bb.0: 894; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 895; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 896; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 897; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0 898; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 899; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 900; AVX1-NEXT: retq 901; 902; AVX2-LABEL: signbit_setmask_swap_v4i64: 903; AVX2: # %bb.0: 904; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 905; AVX2-NEXT: vpcmpgtq %ymm0, %ymm2, %ymm0 906; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 907; AVX2-NEXT: retq 908; 909; AVX512-LABEL: signbit_setmask_swap_v4i64: 910; AVX512: # %bb.0: 911; AVX512-NEXT: vpsraq $63, %ymm0, %ymm0 912; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 913; AVX512-NEXT: retq 914 %cond = icmp sgt <4 x i64> %a, <i64 -1, i64 -1, i64 -1, i64 -1> 915 %r = select <4 x i1> %cond, <4 x i64> %b, <4 x i64> <i64 -1, i64 -1, i64 -1, i64 -1> 916 ret <4 x i64> %r 917} 918 919define <16 x i8> @not_signbit_mask_v16i8(<16 x i8> %a, <16 x i8> %b) { 920; SSE-LABEL: not_signbit_mask_v16i8: 921; SSE: # %bb.0: 922; SSE-NEXT: pcmpeqd %xmm2, %xmm2 923; SSE-NEXT: pcmpgtb %xmm2, %xmm0 924; SSE-NEXT: pand %xmm1, %xmm0 925; SSE-NEXT: retq 926; 927; AVX-LABEL: not_signbit_mask_v16i8: 928; AVX: # %bb.0: 929; AVX-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 930; AVX-NEXT: vpcmpgtb %xmm2, %xmm0, %xmm0 931; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 932; AVX-NEXT: retq 933; 934; AVX512-LABEL: not_signbit_mask_v16i8: 935; AVX512: # %bb.0: 936; AVX512-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 937; AVX512-NEXT: vpcmpgtb %xmm2, %xmm0, %xmm0 938; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0 939; AVX512-NEXT: retq 940 %cond = icmp sgt <16 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 941 %r = select <16 x i1> %cond, <16 x i8> %b, <16 x i8> zeroinitializer 942 ret <16 x i8> %r 943} 944 945define <8 x i16> @not_signbit_mask_v8i16(<8 x i16> %a, <8 x i16> %b) { 946; SSE-LABEL: not_signbit_mask_v8i16: 947; SSE: # %bb.0: 948; SSE-NEXT: psraw $15, %xmm0 949; SSE-NEXT: pandn %xmm1, %xmm0 950; SSE-NEXT: retq 951; 952; AVX-LABEL: not_signbit_mask_v8i16: 953; AVX: # %bb.0: 954; AVX-NEXT: vpsraw $15, %xmm0, %xmm0 955; AVX-NEXT: vpandn %xmm1, %xmm0, %xmm0 956; AVX-NEXT: retq 957; 958; AVX512-LABEL: not_signbit_mask_v8i16: 959; AVX512: # %bb.0: 960; AVX512-NEXT: vpsraw $15, %xmm0, %xmm0 961; AVX512-NEXT: vpandn %xmm1, %xmm0, %xmm0 962; AVX512-NEXT: retq 963 %cond = icmp sgt <8 x i16> %a, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> 964 %r = select <8 x i1> %cond, <8 x i16> %b, <8 x i16> zeroinitializer 965 ret <8 x i16> %r 966} 967 968; Swap cmp pred and select ops. This is logically equivalent to the above test. 969 970define <8 x i16> @not_signbit_mask_swap_v8i16(<8 x i16> %a, <8 x i16> %b) { 971; SSE-LABEL: not_signbit_mask_swap_v8i16: 972; SSE: # %bb.0: 973; SSE-NEXT: psraw $15, %xmm0 974; SSE-NEXT: pandn %xmm1, %xmm0 975; SSE-NEXT: retq 976; 977; AVX-LABEL: not_signbit_mask_swap_v8i16: 978; AVX: # %bb.0: 979; AVX-NEXT: vpsraw $15, %xmm0, %xmm0 980; AVX-NEXT: vpandn %xmm1, %xmm0, %xmm0 981; AVX-NEXT: retq 982; 983; AVX512-LABEL: not_signbit_mask_swap_v8i16: 984; AVX512: # %bb.0: 985; AVX512-NEXT: vpsraw $15, %xmm0, %xmm0 986; AVX512-NEXT: vpandn %xmm1, %xmm0, %xmm0 987; AVX512-NEXT: retq 988 %cond = icmp slt <8 x i16> %a, zeroinitializer 989 %r = select <8 x i1> %cond, <8 x i16> zeroinitializer, <8 x i16> %b 990 ret <8 x i16> %r 991} 992 993define <4 x i32> @not_signbit_mask_v4i32(<4 x i32> %a, <4 x i32> %b) { 994; SSE-LABEL: not_signbit_mask_v4i32: 995; SSE: # %bb.0: 996; SSE-NEXT: psrad $31, %xmm0 997; SSE-NEXT: pandn %xmm1, %xmm0 998; SSE-NEXT: retq 999; 1000; AVX-LABEL: not_signbit_mask_v4i32: 1001; AVX: # %bb.0: 1002; AVX-NEXT: vpsrad $31, %xmm0, %xmm0 1003; AVX-NEXT: vpandn %xmm1, %xmm0, %xmm0 1004; AVX-NEXT: retq 1005; 1006; AVX512-LABEL: not_signbit_mask_v4i32: 1007; AVX512: # %bb.0: 1008; AVX512-NEXT: vpsrad $31, %xmm0, %xmm0 1009; AVX512-NEXT: vpandn %xmm1, %xmm0, %xmm0 1010; AVX512-NEXT: retq 1011 %cond = icmp sgt <4 x i32> %a, <i32 -1, i32 -1, i32 -1, i32 -1> 1012 %r = select <4 x i1> %cond, <4 x i32> %b, <4 x i32> zeroinitializer 1013 ret <4 x i32> %r 1014} 1015 1016define <2 x i64> @not_signbit_mask_v2i64(<2 x i64> %a, <2 x i64> %b) { 1017; SSE2-LABEL: not_signbit_mask_v2i64: 1018; SSE2: # %bb.0: 1019; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 1020; SSE2-NEXT: pcmpeqd %xmm2, %xmm2 1021; SSE2-NEXT: pcmpgtd %xmm2, %xmm0 1022; SSE2-NEXT: pand %xmm1, %xmm0 1023; SSE2-NEXT: retq 1024; 1025; SSE42-LABEL: not_signbit_mask_v2i64: 1026; SSE42: # %bb.0: 1027; SSE42-NEXT: pcmpeqd %xmm2, %xmm2 1028; SSE42-NEXT: pcmpgtq %xmm2, %xmm0 1029; SSE42-NEXT: pand %xmm1, %xmm0 1030; SSE42-NEXT: retq 1031; 1032; AVX-LABEL: not_signbit_mask_v2i64: 1033; AVX: # %bb.0: 1034; AVX-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 1035; AVX-NEXT: vpcmpgtq %xmm2, %xmm0, %xmm0 1036; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 1037; AVX-NEXT: retq 1038; 1039; AVX512-LABEL: not_signbit_mask_v2i64: 1040; AVX512: # %bb.0: 1041; AVX512-NEXT: vpsraq $63, %xmm0, %xmm0 1042; AVX512-NEXT: vpandn %xmm1, %xmm0, %xmm0 1043; AVX512-NEXT: retq 1044 %cond = icmp sgt <2 x i64> %a, <i64 -1, i64 -1> 1045 %r = select <2 x i1> %cond, <2 x i64> %b, <2 x i64> zeroinitializer 1046 ret <2 x i64> %r 1047} 1048 1049define <32 x i8> @not_signbit_mask_v32i8(<32 x i8> %a, <32 x i8> %b) { 1050; SSE-LABEL: not_signbit_mask_v32i8: 1051; SSE: # %bb.0: 1052; SSE-NEXT: pcmpeqd %xmm4, %xmm4 1053; SSE-NEXT: pcmpgtb %xmm4, %xmm0 1054; SSE-NEXT: pand %xmm2, %xmm0 1055; SSE-NEXT: pcmpgtb %xmm4, %xmm1 1056; SSE-NEXT: pand %xmm3, %xmm1 1057; SSE-NEXT: retq 1058; 1059; AVX1-LABEL: not_signbit_mask_v32i8: 1060; AVX1: # %bb.0: 1061; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 1062; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 1063; AVX1-NEXT: vpcmpgtb %xmm2, %xmm3, %xmm2 1064; AVX1-NEXT: vpcmpgtb %xmm0, %xmm3, %xmm0 1065; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1066; AVX1-NEXT: vandnps %ymm1, %ymm0, %ymm0 1067; AVX1-NEXT: retq 1068; 1069; AVX2-LABEL: not_signbit_mask_v32i8: 1070; AVX2: # %bb.0: 1071; AVX2-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 1072; AVX2-NEXT: vpcmpgtb %ymm2, %ymm0, %ymm0 1073; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 1074; AVX2-NEXT: retq 1075; 1076; AVX512-LABEL: not_signbit_mask_v32i8: 1077; AVX512: # %bb.0: 1078; AVX512-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 1079; AVX512-NEXT: vpcmpgtb %ymm2, %ymm0, %ymm0 1080; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0 1081; AVX512-NEXT: retq 1082 %cond = icmp sgt <32 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 1083 %r = select <32 x i1> %cond, <32 x i8> %b, <32 x i8> zeroinitializer 1084 ret <32 x i8> %r 1085} 1086 1087define <16 x i16> @not_signbit_mask_v16i16(<16 x i16> %a, <16 x i16> %b) { 1088; SSE-LABEL: not_signbit_mask_v16i16: 1089; SSE: # %bb.0: 1090; SSE-NEXT: psraw $15, %xmm0 1091; SSE-NEXT: pandn %xmm2, %xmm0 1092; SSE-NEXT: psraw $15, %xmm1 1093; SSE-NEXT: pandn %xmm3, %xmm1 1094; SSE-NEXT: retq 1095; 1096; AVX1-LABEL: not_signbit_mask_v16i16: 1097; AVX1: # %bb.0: 1098; AVX1-NEXT: vpsraw $15, %xmm0, %xmm2 1099; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1100; AVX1-NEXT: vpsraw $15, %xmm0, %xmm0 1101; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 1102; AVX1-NEXT: vandnps %ymm1, %ymm0, %ymm0 1103; AVX1-NEXT: retq 1104; 1105; AVX2-LABEL: not_signbit_mask_v16i16: 1106; AVX2: # %bb.0: 1107; AVX2-NEXT: vpsraw $15, %ymm0, %ymm0 1108; AVX2-NEXT: vpandn %ymm1, %ymm0, %ymm0 1109; AVX2-NEXT: retq 1110; 1111; AVX512-LABEL: not_signbit_mask_v16i16: 1112; AVX512: # %bb.0: 1113; AVX512-NEXT: vpsraw $15, %ymm0, %ymm0 1114; AVX512-NEXT: vpandn %ymm1, %ymm0, %ymm0 1115; AVX512-NEXT: retq 1116 %cond = icmp sgt <16 x i16> %a, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> 1117 %r = select <16 x i1> %cond, <16 x i16> %b, <16 x i16> zeroinitializer 1118 ret <16 x i16> %r 1119} 1120 1121define <8 x i32> @not_signbit_mask_v8i32(<8 x i32> %a, <8 x i32> %b) { 1122; SSE-LABEL: not_signbit_mask_v8i32: 1123; SSE: # %bb.0: 1124; SSE-NEXT: psrad $31, %xmm0 1125; SSE-NEXT: pandn %xmm2, %xmm0 1126; SSE-NEXT: psrad $31, %xmm1 1127; SSE-NEXT: pandn %xmm3, %xmm1 1128; SSE-NEXT: retq 1129; 1130; AVX1-LABEL: not_signbit_mask_v8i32: 1131; AVX1: # %bb.0: 1132; AVX1-NEXT: vpsrad $31, %xmm0, %xmm2 1133; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1134; AVX1-NEXT: vpsrad $31, %xmm0, %xmm0 1135; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 1136; AVX1-NEXT: vandnps %ymm1, %ymm0, %ymm0 1137; AVX1-NEXT: retq 1138; 1139; AVX2-LABEL: not_signbit_mask_v8i32: 1140; AVX2: # %bb.0: 1141; AVX2-NEXT: vpsrad $31, %ymm0, %ymm0 1142; AVX2-NEXT: vpandn %ymm1, %ymm0, %ymm0 1143; AVX2-NEXT: retq 1144; 1145; AVX512-LABEL: not_signbit_mask_v8i32: 1146; AVX512: # %bb.0: 1147; AVX512-NEXT: vpsrad $31, %ymm0, %ymm0 1148; AVX512-NEXT: vpandn %ymm1, %ymm0, %ymm0 1149; AVX512-NEXT: retq 1150 %cond = icmp sgt <8 x i32> %a, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1> 1151 %r = select <8 x i1> %cond, <8 x i32> %b, <8 x i32> zeroinitializer 1152 ret <8 x i32> %r 1153} 1154 1155; Swap cmp pred and select ops. This is logically equivalent to the above test. 1156 1157define <8 x i32> @not_signbit_mask_swap_v8i32(<8 x i32> %a, <8 x i32> %b) { 1158; SSE-LABEL: not_signbit_mask_swap_v8i32: 1159; SSE: # %bb.0: 1160; SSE-NEXT: psrad $31, %xmm0 1161; SSE-NEXT: pandn %xmm2, %xmm0 1162; SSE-NEXT: psrad $31, %xmm1 1163; SSE-NEXT: pandn %xmm3, %xmm1 1164; SSE-NEXT: retq 1165; 1166; AVX1-LABEL: not_signbit_mask_swap_v8i32: 1167; AVX1: # %bb.0: 1168; AVX1-NEXT: vpsrad $31, %xmm0, %xmm2 1169; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1170; AVX1-NEXT: vpsrad $31, %xmm0, %xmm0 1171; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 1172; AVX1-NEXT: vandnps %ymm1, %ymm0, %ymm0 1173; AVX1-NEXT: retq 1174; 1175; AVX2-LABEL: not_signbit_mask_swap_v8i32: 1176; AVX2: # %bb.0: 1177; AVX2-NEXT: vpsrad $31, %ymm0, %ymm0 1178; AVX2-NEXT: vpandn %ymm1, %ymm0, %ymm0 1179; AVX2-NEXT: retq 1180; 1181; AVX512-LABEL: not_signbit_mask_swap_v8i32: 1182; AVX512: # %bb.0: 1183; AVX512-NEXT: vpsrad $31, %ymm0, %ymm0 1184; AVX512-NEXT: vpandn %ymm1, %ymm0, %ymm0 1185; AVX512-NEXT: retq 1186 %cond = icmp slt <8 x i32> %a, zeroinitializer 1187 %r = select <8 x i1> %cond, <8 x i32> zeroinitializer, <8 x i32> %b 1188 ret <8 x i32> %r 1189} 1190 1191define <4 x i64> @not_signbit_mask_v4i64(<4 x i64> %a, <4 x i64> %b) { 1192; SSE2-LABEL: not_signbit_mask_v4i64: 1193; SSE2: # %bb.0: 1194; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 1195; SSE2-NEXT: pcmpeqd %xmm4, %xmm4 1196; SSE2-NEXT: pcmpgtd %xmm4, %xmm0 1197; SSE2-NEXT: pand %xmm2, %xmm0 1198; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 1199; SSE2-NEXT: pcmpgtd %xmm4, %xmm1 1200; SSE2-NEXT: pand %xmm3, %xmm1 1201; SSE2-NEXT: retq 1202; 1203; SSE42-LABEL: not_signbit_mask_v4i64: 1204; SSE42: # %bb.0: 1205; SSE42-NEXT: pcmpeqd %xmm4, %xmm4 1206; SSE42-NEXT: pcmpgtq %xmm4, %xmm0 1207; SSE42-NEXT: pand %xmm2, %xmm0 1208; SSE42-NEXT: pcmpgtq %xmm4, %xmm1 1209; SSE42-NEXT: pand %xmm3, %xmm1 1210; SSE42-NEXT: retq 1211; 1212; AVX1-LABEL: not_signbit_mask_v4i64: 1213; AVX1: # %bb.0: 1214; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 1215; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 1216; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 1217; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0 1218; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1219; AVX1-NEXT: vandnps %ymm1, %ymm0, %ymm0 1220; AVX1-NEXT: retq 1221; 1222; AVX2-LABEL: not_signbit_mask_v4i64: 1223; AVX2: # %bb.0: 1224; AVX2-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 1225; AVX2-NEXT: vpcmpgtq %ymm2, %ymm0, %ymm0 1226; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 1227; AVX2-NEXT: retq 1228; 1229; AVX512-LABEL: not_signbit_mask_v4i64: 1230; AVX512: # %bb.0: 1231; AVX512-NEXT: vpsraq $63, %ymm0, %ymm0 1232; AVX512-NEXT: vpandn %ymm1, %ymm0, %ymm0 1233; AVX512-NEXT: retq 1234 %cond = icmp sgt <4 x i64> %a, <i64 -1, i64 -1, i64 -1, i64 -1> 1235 %r = select <4 x i1> %cond, <4 x i64> %b, <4 x i64> zeroinitializer 1236 ret <4 x i64> %r 1237} 1238