1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,SSE,SSE2 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefixes=CHECK,SSE,SSE42 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX,AVX1 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX,AVX2 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX,AVX512,AVX512F 7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512dq,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX,AVX512,AVX512DQBW 8 9; Lower common integer comparisons such as 'isPositive' efficiently: 10; https://llvm.org/bugs/show_bug.cgi?id=26701 11 12define <16 x i8> @test_pcmpgtb(<16 x i8> %x) { 13; SSE-LABEL: test_pcmpgtb: 14; SSE: # %bb.0: 15; SSE-NEXT: pcmpeqd %xmm1, %xmm1 16; SSE-NEXT: pcmpgtb %xmm1, %xmm0 17; SSE-NEXT: retq 18; 19; AVX-LABEL: test_pcmpgtb: 20; AVX: # %bb.0: 21; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 22; AVX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0 23; AVX-NEXT: retq 24 %sign = ashr <16 x i8> %x, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7> 25 %not = xor <16 x i8> %sign, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 26 ret <16 x i8> %not 27} 28 29define <8 x i16> @test_pcmpgtw(<8 x i16> %x) { 30; SSE-LABEL: test_pcmpgtw: 31; SSE: # %bb.0: 32; SSE-NEXT: pcmpeqd %xmm1, %xmm1 33; SSE-NEXT: pcmpgtw %xmm1, %xmm0 34; SSE-NEXT: retq 35; 36; AVX-LABEL: test_pcmpgtw: 37; AVX: # %bb.0: 38; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 39; AVX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 40; AVX-NEXT: retq 41 %sign = ashr <8 x i16> %x, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> 42 %not = xor <8 x i16> %sign, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> 43 ret <8 x i16> %not 44} 45 46define <4 x i32> @test_pcmpgtd(<4 x i32> %x) { 47; SSE-LABEL: test_pcmpgtd: 48; SSE: # %bb.0: 49; SSE-NEXT: pcmpeqd %xmm1, %xmm1 50; SSE-NEXT: pcmpgtd %xmm1, %xmm0 51; SSE-NEXT: retq 52; 53; AVX-LABEL: test_pcmpgtd: 54; AVX: # %bb.0: 55; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 56; AVX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 57; AVX-NEXT: retq 58 %sign = ashr <4 x i32> %x, <i32 31, i32 31, i32 31, i32 31> 59 %not = xor <4 x i32> %sign, <i32 -1, i32 -1, i32 -1, i32 -1> 60 ret <4 x i32> %not 61} 62 63define <2 x i64> @test_pcmpgtq(<2 x i64> %x) { 64; SSE2-LABEL: test_pcmpgtq: 65; SSE2: # %bb.0: 66; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 67; SSE2-NEXT: pcmpeqd %xmm1, %xmm1 68; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 69; SSE2-NEXT: retq 70; 71; SSE42-LABEL: test_pcmpgtq: 72; SSE42: # %bb.0: 73; SSE42-NEXT: pcmpeqd %xmm1, %xmm1 74; SSE42-NEXT: pcmpgtq %xmm1, %xmm0 75; SSE42-NEXT: retq 76; 77; AVX-LABEL: test_pcmpgtq: 78; AVX: # %bb.0: 79; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 80; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 81; AVX-NEXT: retq 82 %sign = ashr <2 x i64> %x, <i64 63, i64 63> 83 %not = xor <2 x i64> %sign, <i64 -1, i64 -1> 84 ret <2 x i64> %not 85} 86 87define <1 x i128> @test_strange_type(<1 x i128> %x) { 88; CHECK-LABEL: test_strange_type: 89; CHECK: # %bb.0: 90; CHECK-NEXT: movq %rsi, %rax 91; CHECK-NEXT: sarq $63, %rax 92; CHECK-NEXT: notq %rax 93; CHECK-NEXT: movq %rax, %rdx 94; CHECK-NEXT: retq 95 %sign = ashr <1 x i128> %x, <i128 127> 96 %not = xor <1 x i128> %sign, <i128 -1> 97 ret <1 x i128> %not 98} 99 100define <32 x i8> @test_pcmpgtb_256(<32 x i8> %x) { 101; SSE-LABEL: test_pcmpgtb_256: 102; SSE: # %bb.0: 103; SSE-NEXT: pcmpeqd %xmm2, %xmm2 104; SSE-NEXT: pcmpgtb %xmm2, %xmm0 105; SSE-NEXT: pcmpgtb %xmm2, %xmm1 106; SSE-NEXT: retq 107; 108; AVX1-LABEL: test_pcmpgtb_256: 109; AVX1: # %bb.0: 110; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 111; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 112; AVX1-NEXT: vpcmpgtb %xmm1, %xmm2, %xmm1 113; AVX1-NEXT: vpcmpgtb %xmm0, %xmm2, %xmm0 114; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 115; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 116; AVX1-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1 117; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0 118; AVX1-NEXT: retq 119; 120; AVX2-LABEL: test_pcmpgtb_256: 121; AVX2: # %bb.0: 122; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 123; AVX2-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 124; AVX2-NEXT: retq 125; 126; AVX512-LABEL: test_pcmpgtb_256: 127; AVX512: # %bb.0: 128; AVX512-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 129; AVX512-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 130; AVX512-NEXT: retq 131 %sign = ashr <32 x i8> %x, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7> 132 %not = xor <32 x i8> %sign, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 133 ret <32 x i8> %not 134} 135 136define <16 x i16> @test_pcmpgtw_256(<16 x i16> %x) { 137; SSE-LABEL: test_pcmpgtw_256: 138; SSE: # %bb.0: 139; SSE-NEXT: pcmpeqd %xmm2, %xmm2 140; SSE-NEXT: pcmpgtw %xmm2, %xmm0 141; SSE-NEXT: pcmpgtw %xmm2, %xmm1 142; SSE-NEXT: retq 143; 144; AVX1-LABEL: test_pcmpgtw_256: 145; AVX1: # %bb.0: 146; AVX1-NEXT: vpsraw $15, %xmm0, %xmm1 147; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 148; AVX1-NEXT: vpsraw $15, %xmm0, %xmm0 149; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 150; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 151; AVX1-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1 152; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0 153; AVX1-NEXT: retq 154; 155; AVX2-LABEL: test_pcmpgtw_256: 156; AVX2: # %bb.0: 157; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 158; AVX2-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 159; AVX2-NEXT: retq 160; 161; AVX512-LABEL: test_pcmpgtw_256: 162; AVX512: # %bb.0: 163; AVX512-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 164; AVX512-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 165; AVX512-NEXT: retq 166 %sign = ashr <16 x i16> %x, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> 167 %not = xor <16 x i16> %sign, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> 168 ret <16 x i16> %not 169} 170 171define <8 x i32> @test_pcmpgtd_256(<8 x i32> %x) { 172; SSE-LABEL: test_pcmpgtd_256: 173; SSE: # %bb.0: 174; SSE-NEXT: pcmpeqd %xmm2, %xmm2 175; SSE-NEXT: pcmpgtd %xmm2, %xmm0 176; SSE-NEXT: pcmpgtd %xmm2, %xmm1 177; SSE-NEXT: retq 178; 179; AVX1-LABEL: test_pcmpgtd_256: 180; AVX1: # %bb.0: 181; AVX1-NEXT: vpsrad $31, %xmm0, %xmm1 182; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 183; AVX1-NEXT: vpsrad $31, %xmm0, %xmm0 184; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 185; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 186; AVX1-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1 187; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0 188; AVX1-NEXT: retq 189; 190; AVX2-LABEL: test_pcmpgtd_256: 191; AVX2: # %bb.0: 192; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 193; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 194; AVX2-NEXT: retq 195; 196; AVX512-LABEL: test_pcmpgtd_256: 197; AVX512: # %bb.0: 198; AVX512-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 199; AVX512-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 200; AVX512-NEXT: retq 201 %sign = ashr <8 x i32> %x, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31> 202 %not = xor <8 x i32> %sign, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1> 203 ret <8 x i32> %not 204} 205 206define <4 x i64> @test_pcmpgtq_256(<4 x i64> %x) { 207; SSE2-LABEL: test_pcmpgtq_256: 208; SSE2: # %bb.0: 209; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 210; SSE2-NEXT: pcmpeqd %xmm2, %xmm2 211; SSE2-NEXT: pcmpgtd %xmm2, %xmm0 212; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 213; SSE2-NEXT: pcmpgtd %xmm2, %xmm1 214; SSE2-NEXT: retq 215; 216; SSE42-LABEL: test_pcmpgtq_256: 217; SSE42: # %bb.0: 218; SSE42-NEXT: pcmpeqd %xmm2, %xmm2 219; SSE42-NEXT: pcmpgtq %xmm2, %xmm0 220; SSE42-NEXT: pcmpgtq %xmm2, %xmm1 221; SSE42-NEXT: retq 222; 223; AVX1-LABEL: test_pcmpgtq_256: 224; AVX1: # %bb.0: 225; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 226; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 227; AVX1-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm1 228; AVX1-NEXT: vpcmpgtq %xmm0, %xmm2, %xmm0 229; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 230; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 231; AVX1-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1 232; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0 233; AVX1-NEXT: retq 234; 235; AVX2-LABEL: test_pcmpgtq_256: 236; AVX2: # %bb.0: 237; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 238; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 239; AVX2-NEXT: retq 240; 241; AVX512-LABEL: test_pcmpgtq_256: 242; AVX512: # %bb.0: 243; AVX512-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 244; AVX512-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 245; AVX512-NEXT: retq 246 %sign = ashr <4 x i64> %x, <i64 63, i64 63, i64 63, i64 63> 247 %not = xor <4 x i64> %sign, <i64 -1, i64 -1, i64 -1, i64 -1> 248 ret <4 x i64> %not 249} 250 251define <16 x i8> @cmpeq_zext_v16i8(<16 x i8> %a, <16 x i8> %b) { 252; SSE-LABEL: cmpeq_zext_v16i8: 253; SSE: # %bb.0: 254; SSE-NEXT: pcmpeqb %xmm1, %xmm0 255; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 256; SSE-NEXT: retq 257; 258; AVX1-LABEL: cmpeq_zext_v16i8: 259; AVX1: # %bb.0: 260; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 261; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 262; AVX1-NEXT: retq 263; 264; AVX2-LABEL: cmpeq_zext_v16i8: 265; AVX2: # %bb.0: 266; AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 267; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 268; AVX2-NEXT: retq 269; 270; AVX512-LABEL: cmpeq_zext_v16i8: 271; AVX512: # %bb.0: 272; AVX512-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 273; AVX512-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 274; AVX512-NEXT: retq 275 %cmp = icmp eq <16 x i8> %a, %b 276 %zext = zext <16 x i1> %cmp to <16 x i8> 277 ret <16 x i8> %zext 278} 279 280define <16 x i16> @cmpeq_zext_v16i16(<16 x i16> %a, <16 x i16> %b) { 281; SSE-LABEL: cmpeq_zext_v16i16: 282; SSE: # %bb.0: 283; SSE-NEXT: pcmpeqw %xmm2, %xmm0 284; SSE-NEXT: psrlw $15, %xmm0 285; SSE-NEXT: pcmpeqw %xmm3, %xmm1 286; SSE-NEXT: psrlw $15, %xmm1 287; SSE-NEXT: retq 288; 289; AVX1-LABEL: cmpeq_zext_v16i16: 290; AVX1: # %bb.0: 291; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 292; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 293; AVX1-NEXT: vpcmpeqw %xmm2, %xmm3, %xmm2 294; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 295; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 296; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 297; AVX1-NEXT: retq 298; 299; AVX2-LABEL: cmpeq_zext_v16i16: 300; AVX2: # %bb.0: 301; AVX2-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 302; AVX2-NEXT: vpsrlw $15, %ymm0, %ymm0 303; AVX2-NEXT: retq 304; 305; AVX512-LABEL: cmpeq_zext_v16i16: 306; AVX512: # %bb.0: 307; AVX512-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 308; AVX512-NEXT: vpsrlw $15, %ymm0, %ymm0 309; AVX512-NEXT: retq 310 %cmp = icmp eq <16 x i16> %a, %b 311 %zext = zext <16 x i1> %cmp to <16 x i16> 312 ret <16 x i16> %zext 313} 314 315define <4 x i32> @cmpeq_zext_v4i32(<4 x i32> %a, <4 x i32> %b) { 316; SSE-LABEL: cmpeq_zext_v4i32: 317; SSE: # %bb.0: 318; SSE-NEXT: pcmpeqd %xmm1, %xmm0 319; SSE-NEXT: psrld $31, %xmm0 320; SSE-NEXT: retq 321; 322; AVX-LABEL: cmpeq_zext_v4i32: 323; AVX: # %bb.0: 324; AVX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 325; AVX-NEXT: vpsrld $31, %xmm0, %xmm0 326; AVX-NEXT: retq 327 %cmp = icmp eq <4 x i32> %a, %b 328 %zext = zext <4 x i1> %cmp to <4 x i32> 329 ret <4 x i32> %zext 330} 331 332define <4 x i64> @cmpeq_zext_v4i64(<4 x i64> %a, <4 x i64> %b) { 333; SSE2-LABEL: cmpeq_zext_v4i64: 334; SSE2: # %bb.0: 335; SSE2-NEXT: pcmpeqd %xmm2, %xmm0 336; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,0,3,2] 337; SSE2-NEXT: pand %xmm2, %xmm0 338; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [1,1] 339; SSE2-NEXT: pand %xmm2, %xmm0 340; SSE2-NEXT: pcmpeqd %xmm3, %xmm1 341; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,0,3,2] 342; SSE2-NEXT: pand %xmm3, %xmm1 343; SSE2-NEXT: pand %xmm2, %xmm1 344; SSE2-NEXT: retq 345; 346; SSE42-LABEL: cmpeq_zext_v4i64: 347; SSE42: # %bb.0: 348; SSE42-NEXT: pcmpeqq %xmm2, %xmm0 349; SSE42-NEXT: psrlq $63, %xmm0 350; SSE42-NEXT: pcmpeqq %xmm3, %xmm1 351; SSE42-NEXT: psrlq $63, %xmm1 352; SSE42-NEXT: retq 353; 354; AVX1-LABEL: cmpeq_zext_v4i64: 355; AVX1: # %bb.0: 356; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 357; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 358; AVX1-NEXT: vpcmpeqq %xmm2, %xmm3, %xmm2 359; AVX1-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 360; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 361; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 362; AVX1-NEXT: retq 363; 364; AVX2-LABEL: cmpeq_zext_v4i64: 365; AVX2: # %bb.0: 366; AVX2-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 367; AVX2-NEXT: vpsrlq $63, %ymm0, %ymm0 368; AVX2-NEXT: retq 369; 370; AVX512-LABEL: cmpeq_zext_v4i64: 371; AVX512: # %bb.0: 372; AVX512-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 373; AVX512-NEXT: vpsrlq $63, %ymm0, %ymm0 374; AVX512-NEXT: retq 375 %cmp = icmp eq <4 x i64> %a, %b 376 %zext = zext <4 x i1> %cmp to <4 x i64> 377 ret <4 x i64> %zext 378} 379 380define <32 x i8> @cmpgt_zext_v32i8(<32 x i8> %a, <32 x i8> %b) { 381; SSE-LABEL: cmpgt_zext_v32i8: 382; SSE: # %bb.0: 383; SSE-NEXT: pcmpgtb %xmm2, %xmm0 384; SSE-NEXT: movdqa {{.*#+}} xmm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] 385; SSE-NEXT: pand %xmm2, %xmm0 386; SSE-NEXT: pcmpgtb %xmm3, %xmm1 387; SSE-NEXT: pand %xmm2, %xmm1 388; SSE-NEXT: retq 389; 390; AVX1-LABEL: cmpgt_zext_v32i8: 391; AVX1: # %bb.0: 392; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 393; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 394; AVX1-NEXT: vpcmpgtb %xmm2, %xmm3, %xmm2 395; AVX1-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0 396; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 397; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 398; AVX1-NEXT: retq 399; 400; AVX2-LABEL: cmpgt_zext_v32i8: 401; AVX2: # %bb.0: 402; AVX2-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 403; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 404; AVX2-NEXT: retq 405; 406; AVX512-LABEL: cmpgt_zext_v32i8: 407; AVX512: # %bb.0: 408; AVX512-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 409; AVX512-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0 410; AVX512-NEXT: retq 411 %cmp = icmp sgt <32 x i8> %a, %b 412 %zext = zext <32 x i1> %cmp to <32 x i8> 413 ret <32 x i8> %zext 414} 415 416define <8 x i16> @cmpgt_zext_v8i16(<8 x i16> %a, <8 x i16> %b) { 417; SSE-LABEL: cmpgt_zext_v8i16: 418; SSE: # %bb.0: 419; SSE-NEXT: pcmpgtw %xmm1, %xmm0 420; SSE-NEXT: psrlw $15, %xmm0 421; SSE-NEXT: retq 422; 423; AVX-LABEL: cmpgt_zext_v8i16: 424; AVX: # %bb.0: 425; AVX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 426; AVX-NEXT: vpsrlw $15, %xmm0, %xmm0 427; AVX-NEXT: retq 428 %cmp = icmp sgt <8 x i16> %a, %b 429 %zext = zext <8 x i1> %cmp to <8 x i16> 430 ret <8 x i16> %zext 431} 432 433define <8 x i32> @cmpgt_zext_v8i32(<8 x i32> %a, <8 x i32> %b) { 434; SSE-LABEL: cmpgt_zext_v8i32: 435; SSE: # %bb.0: 436; SSE-NEXT: pcmpgtd %xmm2, %xmm0 437; SSE-NEXT: psrld $31, %xmm0 438; SSE-NEXT: pcmpgtd %xmm3, %xmm1 439; SSE-NEXT: psrld $31, %xmm1 440; SSE-NEXT: retq 441; 442; AVX1-LABEL: cmpgt_zext_v8i32: 443; AVX1: # %bb.0: 444; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 445; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 446; AVX1-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm2 447; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 448; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 449; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 450; AVX1-NEXT: retq 451; 452; AVX2-LABEL: cmpgt_zext_v8i32: 453; AVX2: # %bb.0: 454; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 455; AVX2-NEXT: vpsrld $31, %ymm0, %ymm0 456; AVX2-NEXT: retq 457; 458; AVX512-LABEL: cmpgt_zext_v8i32: 459; AVX512: # %bb.0: 460; AVX512-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 461; AVX512-NEXT: vpsrld $31, %ymm0, %ymm0 462; AVX512-NEXT: retq 463 %cmp = icmp sgt <8 x i32> %a, %b 464 %zext = zext <8 x i1> %cmp to <8 x i32> 465 ret <8 x i32> %zext 466} 467 468define <2 x i64> @cmpgt_zext_v2i64(<2 x i64> %a, <2 x i64> %b) { 469; SSE2-LABEL: cmpgt_zext_v2i64: 470; SSE2: # %bb.0: 471; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648] 472; SSE2-NEXT: pxor %xmm2, %xmm1 473; SSE2-NEXT: pxor %xmm2, %xmm0 474; SSE2-NEXT: movdqa %xmm0, %xmm2 475; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 476; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 477; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 478; SSE2-NEXT: pand %xmm2, %xmm1 479; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 480; SSE2-NEXT: por %xmm1, %xmm0 481; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 482; SSE2-NEXT: retq 483; 484; SSE42-LABEL: cmpgt_zext_v2i64: 485; SSE42: # %bb.0: 486; SSE42-NEXT: pcmpgtq %xmm1, %xmm0 487; SSE42-NEXT: psrlq $63, %xmm0 488; SSE42-NEXT: retq 489; 490; AVX-LABEL: cmpgt_zext_v2i64: 491; AVX: # %bb.0: 492; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 493; AVX-NEXT: vpsrlq $63, %xmm0, %xmm0 494; AVX-NEXT: retq 495 %cmp = icmp sgt <2 x i64> %a, %b 496 %zext = zext <2 x i1> %cmp to <2 x i64> 497 ret <2 x i64> %zext 498} 499 500; Test that we optimize a zext of a vector setcc ne zero where all bits but the 501; lsb are known to be zero. 502define <8 x i32> @cmpne_knownzeros_zext_v8i16_v8i32(<8 x i16> %x) { 503; SSE2-LABEL: cmpne_knownzeros_zext_v8i16_v8i32: 504; SSE2: # %bb.0: 505; SSE2-NEXT: movdqa %xmm0, %xmm1 506; SSE2-NEXT: psrlw $15, %xmm1 507; SSE2-NEXT: pxor %xmm2, %xmm2 508; SSE2-NEXT: movdqa %xmm1, %xmm0 509; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 510; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 511; SSE2-NEXT: retq 512; 513; SSE42-LABEL: cmpne_knownzeros_zext_v8i16_v8i32: 514; SSE42: # %bb.0: 515; SSE42-NEXT: movdqa %xmm0, %xmm1 516; SSE42-NEXT: psrlw $15, %xmm1 517; SSE42-NEXT: pxor %xmm2, %xmm2 518; SSE42-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero 519; SSE42-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 520; SSE42-NEXT: retq 521; 522; AVX1-LABEL: cmpne_knownzeros_zext_v8i16_v8i32: 523; AVX1: # %bb.0: 524; AVX1-NEXT: vpsrlw $15, %xmm0, %xmm0 525; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 526; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 527; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 528; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 529; AVX1-NEXT: retq 530; 531; AVX2-LABEL: cmpne_knownzeros_zext_v8i16_v8i32: 532; AVX2: # %bb.0: 533; AVX2-NEXT: vpsrlw $15, %xmm0, %xmm0 534; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 535; AVX2-NEXT: retq 536; 537; AVX512-LABEL: cmpne_knownzeros_zext_v8i16_v8i32: 538; AVX512: # %bb.0: 539; AVX512-NEXT: vpsrlw $15, %xmm0, %xmm0 540; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 541; AVX512-NEXT: retq 542 %a = lshr <8 x i16> %x, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> 543 %b = icmp ne <8 x i16> %a, zeroinitializer 544 %c = zext <8 x i1> %b to <8 x i32> 545 ret <8 x i32> %c 546} 547 548define <8 x i32> @cmpne_knownzeros_zext_v8i32_v8i32(<8 x i32> %x) { 549; SSE-LABEL: cmpne_knownzeros_zext_v8i32_v8i32: 550; SSE: # %bb.0: 551; SSE-NEXT: psrld $31, %xmm0 552; SSE-NEXT: psrld $31, %xmm1 553; SSE-NEXT: retq 554; 555; AVX1-LABEL: cmpne_knownzeros_zext_v8i32_v8i32: 556; AVX1: # %bb.0: 557; AVX1-NEXT: vpsrld $31, %xmm0, %xmm1 558; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 559; AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 560; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 561; AVX1-NEXT: retq 562; 563; AVX2-LABEL: cmpne_knownzeros_zext_v8i32_v8i32: 564; AVX2: # %bb.0: 565; AVX2-NEXT: vpsrld $31, %ymm0, %ymm0 566; AVX2-NEXT: retq 567; 568; AVX512-LABEL: cmpne_knownzeros_zext_v8i32_v8i32: 569; AVX512: # %bb.0: 570; AVX512-NEXT: vpsrld $31, %ymm0, %ymm0 571; AVX512-NEXT: retq 572 %a = lshr <8 x i32> %x, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31> 573 %b = icmp ne <8 x i32> %a, zeroinitializer 574 %c = zext <8 x i1> %b to <8 x i32> 575 ret <8 x i32> %c 576} 577 578define <8 x i16> @cmpne_knownzeros_zext_v8i32_v8i16(<8 x i32> %x) { 579; SSE2-LABEL: cmpne_knownzeros_zext_v8i32_v8i16: 580; SSE2: # %bb.0: 581; SSE2-NEXT: psrld $31, %xmm1 582; SSE2-NEXT: psrld $31, %xmm0 583; SSE2-NEXT: packuswb %xmm1, %xmm0 584; SSE2-NEXT: retq 585; 586; SSE42-LABEL: cmpne_knownzeros_zext_v8i32_v8i16: 587; SSE42: # %bb.0: 588; SSE42-NEXT: psrld $31, %xmm1 589; SSE42-NEXT: psrld $31, %xmm0 590; SSE42-NEXT: packusdw %xmm1, %xmm0 591; SSE42-NEXT: retq 592; 593; AVX1-LABEL: cmpne_knownzeros_zext_v8i32_v8i16: 594; AVX1: # %bb.0: 595; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 596; AVX1-NEXT: vpsrld $31, %xmm1, %xmm1 597; AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 598; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 599; AVX1-NEXT: vzeroupper 600; AVX1-NEXT: retq 601; 602; AVX2-LABEL: cmpne_knownzeros_zext_v8i32_v8i16: 603; AVX2: # %bb.0: 604; AVX2-NEXT: vpsrld $31, %ymm0, %ymm0 605; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 606; AVX2-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 607; AVX2-NEXT: vzeroupper 608; AVX2-NEXT: retq 609; 610; AVX512-LABEL: cmpne_knownzeros_zext_v8i32_v8i16: 611; AVX512: # %bb.0: 612; AVX512-NEXT: vpsrld $31, %ymm0, %ymm0 613; AVX512-NEXT: vpmovdw %ymm0, %xmm0 614; AVX512-NEXT: vzeroupper 615; AVX512-NEXT: retq 616 %a = lshr <8 x i32> %x, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31> 617 %b = icmp ne <8 x i32> %a, zeroinitializer 618 %c = zext <8 x i1> %b to <8 x i16> 619 ret <8 x i16> %c 620} 621 622; PR26697 623define <4 x i32> @cmpeq_one_mask_bit(<4 x i32> %mask) { 624; SSE-LABEL: cmpeq_one_mask_bit: 625; SSE: # %bb.0: 626; SSE-NEXT: psrad $31, %xmm0 627; SSE-NEXT: retq 628; 629; AVX-LABEL: cmpeq_one_mask_bit: 630; AVX: # %bb.0: 631; AVX-NEXT: vpsrad $31, %xmm0, %xmm0 632; AVX-NEXT: retq 633 %mask_signbit = and <4 x i32> %mask, <i32 2147483648, i32 2147483648, i32 2147483648, i32 2147483648> 634 %mask_bool = icmp ne <4 x i32> %mask_signbit, zeroinitializer 635 %mask_bool_ext = sext <4 x i1> %mask_bool to <4 x i32> 636 ret <4 x i32> %mask_bool_ext 637} 638 639define <2 x i64> @not_signbit_mask_v2i64(<2 x i64> %x, <2 x i64> %y) { 640; SSE2-LABEL: not_signbit_mask_v2i64: 641; SSE2: # %bb.0: 642; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 643; SSE2-NEXT: pcmpeqd %xmm2, %xmm2 644; SSE2-NEXT: pcmpgtd %xmm2, %xmm0 645; SSE2-NEXT: pand %xmm1, %xmm0 646; SSE2-NEXT: retq 647; 648; SSE42-LABEL: not_signbit_mask_v2i64: 649; SSE42: # %bb.0: 650; SSE42-NEXT: pcmpeqd %xmm2, %xmm2 651; SSE42-NEXT: pcmpgtq %xmm2, %xmm0 652; SSE42-NEXT: pand %xmm1, %xmm0 653; SSE42-NEXT: retq 654; 655; AVX1-LABEL: not_signbit_mask_v2i64: 656; AVX1: # %bb.0: 657; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 658; AVX1-NEXT: vpcmpgtq %xmm2, %xmm0, %xmm0 659; AVX1-NEXT: vpand %xmm0, %xmm1, %xmm0 660; AVX1-NEXT: retq 661; 662; AVX2-LABEL: not_signbit_mask_v2i64: 663; AVX2: # %bb.0: 664; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 665; AVX2-NEXT: vpcmpgtq %xmm2, %xmm0, %xmm0 666; AVX2-NEXT: vpand %xmm0, %xmm1, %xmm0 667; AVX2-NEXT: retq 668; 669; AVX512-LABEL: not_signbit_mask_v2i64: 670; AVX512: # %bb.0: 671; AVX512-NEXT: vpsraq $63, %xmm0, %xmm0 672; AVX512-NEXT: vpandn %xmm1, %xmm0, %xmm0 673; AVX512-NEXT: retq 674 %sh = ashr <2 x i64> %x, <i64 63, i64 63> 675 %not = xor <2 x i64> %sh, <i64 -1, i64 -1> 676 %and = and <2 x i64> %y, %not 677 ret <2 x i64> %and 678} 679 680define <4 x i32> @not_signbit_mask_v4i32(<4 x i32> %x, <4 x i32> %y) { 681; SSE-LABEL: not_signbit_mask_v4i32: 682; SSE: # %bb.0: 683; SSE-NEXT: psrad $31, %xmm0 684; SSE-NEXT: pandn %xmm1, %xmm0 685; SSE-NEXT: retq 686; 687; AVX-LABEL: not_signbit_mask_v4i32: 688; AVX: # %bb.0: 689; AVX-NEXT: vpsrad $31, %xmm0, %xmm0 690; AVX-NEXT: vpandn %xmm1, %xmm0, %xmm0 691; AVX-NEXT: retq 692 %sh = ashr <4 x i32> %x, <i32 31, i32 31, i32 31, i32 31> 693 %not = xor <4 x i32> %sh, <i32 -1, i32 -1, i32 -1, i32 -1> 694 %and = and <4 x i32> %not, %y 695 ret <4 x i32> %and 696} 697 698define <8 x i16> @not_signbit_mask_v8i16(<8 x i16> %x, <8 x i16> %y) { 699; SSE-LABEL: not_signbit_mask_v8i16: 700; SSE: # %bb.0: 701; SSE-NEXT: psraw $15, %xmm0 702; SSE-NEXT: pandn %xmm1, %xmm0 703; SSE-NEXT: retq 704; 705; AVX-LABEL: not_signbit_mask_v8i16: 706; AVX: # %bb.0: 707; AVX-NEXT: vpsraw $15, %xmm0, %xmm0 708; AVX-NEXT: vpandn %xmm1, %xmm0, %xmm0 709; AVX-NEXT: retq 710 %sh = ashr <8 x i16> %x, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> 711 %not = xor <8 x i16> %sh, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> 712 %and = and <8 x i16> %y, %not 713 ret <8 x i16> %and 714} 715 716define <16 x i8> @not_signbit_mask_v16i8(<16 x i8> %x, <16 x i8> %y) { 717; SSE-LABEL: not_signbit_mask_v16i8: 718; SSE: # %bb.0: 719; SSE-NEXT: pcmpeqd %xmm2, %xmm2 720; SSE-NEXT: pcmpgtb %xmm2, %xmm0 721; SSE-NEXT: pand %xmm1, %xmm0 722; SSE-NEXT: retq 723; 724; AVX-LABEL: not_signbit_mask_v16i8: 725; AVX: # %bb.0: 726; AVX-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 727; AVX-NEXT: vpcmpgtb %xmm2, %xmm0, %xmm0 728; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 729; AVX-NEXT: retq 730 %sh = ashr <16 x i8> %x, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7> 731 %not = xor <16 x i8> %sh, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 732 %and = and <16 x i8> %not, %y 733 ret <16 x i8> %and 734} 735 736define <4 x i64> @not_signbit_mask_v4i64(<4 x i64> %x, <4 x i64> %y) { 737; SSE2-LABEL: not_signbit_mask_v4i64: 738; SSE2: # %bb.0: 739; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 740; SSE2-NEXT: pcmpeqd %xmm4, %xmm4 741; SSE2-NEXT: pcmpgtd %xmm4, %xmm1 742; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 743; SSE2-NEXT: pcmpgtd %xmm4, %xmm0 744; SSE2-NEXT: pand %xmm2, %xmm0 745; SSE2-NEXT: pand %xmm3, %xmm1 746; SSE2-NEXT: retq 747; 748; SSE42-LABEL: not_signbit_mask_v4i64: 749; SSE42: # %bb.0: 750; SSE42-NEXT: pcmpeqd %xmm4, %xmm4 751; SSE42-NEXT: pcmpgtq %xmm4, %xmm1 752; SSE42-NEXT: pcmpgtq %xmm4, %xmm0 753; SSE42-NEXT: pand %xmm2, %xmm0 754; SSE42-NEXT: pand %xmm3, %xmm1 755; SSE42-NEXT: retq 756; 757; AVX1-LABEL: not_signbit_mask_v4i64: 758; AVX1: # %bb.0: 759; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 760; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 761; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 762; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0 763; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 764; AVX1-NEXT: vandnps %ymm1, %ymm0, %ymm0 765; AVX1-NEXT: retq 766; 767; AVX2-LABEL: not_signbit_mask_v4i64: 768; AVX2: # %bb.0: 769; AVX2-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 770; AVX2-NEXT: vpcmpgtq %ymm2, %ymm0, %ymm0 771; AVX2-NEXT: vpand %ymm0, %ymm1, %ymm0 772; AVX2-NEXT: retq 773; 774; AVX512-LABEL: not_signbit_mask_v4i64: 775; AVX512: # %bb.0: 776; AVX512-NEXT: vpsraq $63, %ymm0, %ymm0 777; AVX512-NEXT: vpandn %ymm1, %ymm0, %ymm0 778; AVX512-NEXT: retq 779 %sh = ashr <4 x i64> %x, <i64 63, i64 63, i64 63, i64 63> 780 %not = xor <4 x i64> %sh, <i64 -1, i64 -1, i64 -1, i64 -1> 781 %and = and <4 x i64> %y, %not 782 ret <4 x i64> %and 783} 784 785define <8 x i32> @not_signbit_mask_v8i32(<8 x i32> %x, <8 x i32> %y) { 786; SSE-LABEL: not_signbit_mask_v8i32: 787; SSE: # %bb.0: 788; SSE-NEXT: psrad $31, %xmm0 789; SSE-NEXT: pandn %xmm2, %xmm0 790; SSE-NEXT: psrad $31, %xmm1 791; SSE-NEXT: pandn %xmm3, %xmm1 792; SSE-NEXT: retq 793; 794; AVX1-LABEL: not_signbit_mask_v8i32: 795; AVX1: # %bb.0: 796; AVX1-NEXT: vpsrad $31, %xmm0, %xmm2 797; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 798; AVX1-NEXT: vpsrad $31, %xmm0, %xmm0 799; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 800; AVX1-NEXT: vandnps %ymm1, %ymm0, %ymm0 801; AVX1-NEXT: retq 802; 803; AVX2-LABEL: not_signbit_mask_v8i32: 804; AVX2: # %bb.0: 805; AVX2-NEXT: vpsrad $31, %ymm0, %ymm0 806; AVX2-NEXT: vpandn %ymm1, %ymm0, %ymm0 807; AVX2-NEXT: retq 808; 809; AVX512-LABEL: not_signbit_mask_v8i32: 810; AVX512: # %bb.0: 811; AVX512-NEXT: vpsrad $31, %ymm0, %ymm0 812; AVX512-NEXT: vpandn %ymm1, %ymm0, %ymm0 813; AVX512-NEXT: retq 814 %sh = ashr <8 x i32> %x, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31> 815 %not = xor <8 x i32> %sh, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1> 816 %and = and <8 x i32> %not, %y 817 ret <8 x i32> %and 818} 819 820define <16 x i16> @not_signbit_mask_v16i16(<16 x i16> %x, <16 x i16> %y) { 821; SSE-LABEL: not_signbit_mask_v16i16: 822; SSE: # %bb.0: 823; SSE-NEXT: psraw $15, %xmm0 824; SSE-NEXT: pandn %xmm2, %xmm0 825; SSE-NEXT: psraw $15, %xmm1 826; SSE-NEXT: pandn %xmm3, %xmm1 827; SSE-NEXT: retq 828; 829; AVX1-LABEL: not_signbit_mask_v16i16: 830; AVX1: # %bb.0: 831; AVX1-NEXT: vpsraw $15, %xmm0, %xmm2 832; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 833; AVX1-NEXT: vpsraw $15, %xmm0, %xmm0 834; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 835; AVX1-NEXT: vandnps %ymm1, %ymm0, %ymm0 836; AVX1-NEXT: retq 837; 838; AVX2-LABEL: not_signbit_mask_v16i16: 839; AVX2: # %bb.0: 840; AVX2-NEXT: vpsraw $15, %ymm0, %ymm0 841; AVX2-NEXT: vpandn %ymm1, %ymm0, %ymm0 842; AVX2-NEXT: retq 843; 844; AVX512-LABEL: not_signbit_mask_v16i16: 845; AVX512: # %bb.0: 846; AVX512-NEXT: vpsraw $15, %ymm0, %ymm0 847; AVX512-NEXT: vpandn %ymm1, %ymm0, %ymm0 848; AVX512-NEXT: retq 849 %sh = ashr <16 x i16> %x, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> 850 %not = xor <16 x i16> %sh, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> 851 %and = and <16 x i16> %y, %not 852 ret <16 x i16> %and 853} 854 855define <32 x i8> @not_signbit_mask_v32i8(<32 x i8> %x, <32 x i8> %y) { 856; SSE-LABEL: not_signbit_mask_v32i8: 857; SSE: # %bb.0: 858; SSE-NEXT: pcmpeqd %xmm4, %xmm4 859; SSE-NEXT: pcmpgtb %xmm4, %xmm1 860; SSE-NEXT: pcmpgtb %xmm4, %xmm0 861; SSE-NEXT: pand %xmm2, %xmm0 862; SSE-NEXT: pand %xmm3, %xmm1 863; SSE-NEXT: retq 864; 865; AVX1-LABEL: not_signbit_mask_v32i8: 866; AVX1: # %bb.0: 867; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 868; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 869; AVX1-NEXT: vpcmpgtb %xmm2, %xmm3, %xmm2 870; AVX1-NEXT: vpcmpgtb %xmm0, %xmm3, %xmm0 871; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 872; AVX1-NEXT: vandnps %ymm1, %ymm0, %ymm0 873; AVX1-NEXT: retq 874; 875; AVX2-LABEL: not_signbit_mask_v32i8: 876; AVX2: # %bb.0: 877; AVX2-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 878; AVX2-NEXT: vpcmpgtb %ymm2, %ymm0, %ymm0 879; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 880; AVX2-NEXT: retq 881; 882; AVX512-LABEL: not_signbit_mask_v32i8: 883; AVX512: # %bb.0: 884; AVX512-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 885; AVX512-NEXT: vpcmpgtb %ymm2, %ymm0, %ymm0 886; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0 887; AVX512-NEXT: retq 888 %sh = ashr <32 x i8> %x, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7> 889 %not = xor <32 x i8> %sh, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 890 %and = and <32 x i8> %not, %y 891 ret <32 x i8> %and 892} 893 894define <2 x i64> @ispositive_mask_v2i64(<2 x i64> %x, <2 x i64> %y) { 895; SSE2-LABEL: ispositive_mask_v2i64: 896; SSE2: # %bb.0: 897; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 898; SSE2-NEXT: pcmpeqd %xmm2, %xmm2 899; SSE2-NEXT: pcmpgtd %xmm2, %xmm0 900; SSE2-NEXT: pand %xmm1, %xmm0 901; SSE2-NEXT: retq 902; 903; SSE42-LABEL: ispositive_mask_v2i64: 904; SSE42: # %bb.0: 905; SSE42-NEXT: pcmpeqd %xmm2, %xmm2 906; SSE42-NEXT: pcmpgtq %xmm2, %xmm0 907; SSE42-NEXT: pand %xmm1, %xmm0 908; SSE42-NEXT: retq 909; 910; AVX1-LABEL: ispositive_mask_v2i64: 911; AVX1: # %bb.0: 912; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 913; AVX1-NEXT: vpcmpgtq %xmm2, %xmm0, %xmm0 914; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 915; AVX1-NEXT: retq 916; 917; AVX2-LABEL: ispositive_mask_v2i64: 918; AVX2: # %bb.0: 919; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 920; AVX2-NEXT: vpcmpgtq %xmm2, %xmm0, %xmm0 921; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 922; AVX2-NEXT: retq 923; 924; AVX512-LABEL: ispositive_mask_v2i64: 925; AVX512: # %bb.0: 926; AVX512-NEXT: vpsraq $63, %xmm0, %xmm0 927; AVX512-NEXT: vpandn %xmm1, %xmm0, %xmm0 928; AVX512-NEXT: retq 929 %cmp = icmp sgt <2 x i64> %x, <i64 -1, i64 -1> 930 %mask = sext <2 x i1> %cmp to <2 x i64> 931 %and = and <2 x i64> %mask, %y 932 ret <2 x i64> %and 933} 934 935define <4 x i32> @is_positive_mask_v4i32(<4 x i32> %x, <4 x i32> %y) { 936; SSE-LABEL: is_positive_mask_v4i32: 937; SSE: # %bb.0: 938; SSE-NEXT: psrad $31, %xmm0 939; SSE-NEXT: pandn %xmm1, %xmm0 940; SSE-NEXT: retq 941; 942; AVX-LABEL: is_positive_mask_v4i32: 943; AVX: # %bb.0: 944; AVX-NEXT: vpsrad $31, %xmm0, %xmm0 945; AVX-NEXT: vpandn %xmm1, %xmm0, %xmm0 946; AVX-NEXT: retq 947 %cmp = icmp sgt <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> 948 %mask = sext <4 x i1> %cmp to <4 x i32> 949 %and = and <4 x i32> %y, %mask 950 ret <4 x i32> %and 951} 952 953define <8 x i16> @is_positive_mask_v8i16(<8 x i16> %x, <8 x i16> %y) { 954; SSE-LABEL: is_positive_mask_v8i16: 955; SSE: # %bb.0: 956; SSE-NEXT: psraw $15, %xmm0 957; SSE-NEXT: pandn %xmm1, %xmm0 958; SSE-NEXT: retq 959; 960; AVX-LABEL: is_positive_mask_v8i16: 961; AVX: # %bb.0: 962; AVX-NEXT: vpsraw $15, %xmm0, %xmm0 963; AVX-NEXT: vpandn %xmm1, %xmm0, %xmm0 964; AVX-NEXT: retq 965 %cmp = icmp sgt <8 x i16> %x, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> 966 %mask = sext <8 x i1> %cmp to <8 x i16> 967 %and = and <8 x i16> %mask, %y 968 ret <8 x i16> %and 969} 970 971define <16 x i8> @is_positive_mask_v16i8(<16 x i8> %x, <16 x i8> %y) { 972; SSE-LABEL: is_positive_mask_v16i8: 973; SSE: # %bb.0: 974; SSE-NEXT: pcmpeqd %xmm2, %xmm2 975; SSE-NEXT: pcmpgtb %xmm2, %xmm0 976; SSE-NEXT: pand %xmm1, %xmm0 977; SSE-NEXT: retq 978; 979; AVX-LABEL: is_positive_mask_v16i8: 980; AVX: # %bb.0: 981; AVX-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 982; AVX-NEXT: vpcmpgtb %xmm2, %xmm0, %xmm0 983; AVX-NEXT: vpand %xmm0, %xmm1, %xmm0 984; AVX-NEXT: retq 985 %cmp = icmp sgt <16 x i8> %x, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 986 %mask = sext <16 x i1> %cmp to <16 x i8> 987 %and = and <16 x i8> %y, %mask 988 ret <16 x i8> %and 989} 990 991define <4 x i64> @is_positive_mask_v4i64(<4 x i64> %x, <4 x i64> %y) { 992; SSE2-LABEL: is_positive_mask_v4i64: 993; SSE2: # %bb.0: 994; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 995; SSE2-NEXT: pcmpeqd %xmm4, %xmm4 996; SSE2-NEXT: pcmpgtd %xmm4, %xmm1 997; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 998; SSE2-NEXT: pcmpgtd %xmm4, %xmm0 999; SSE2-NEXT: pand %xmm2, %xmm0 1000; SSE2-NEXT: pand %xmm3, %xmm1 1001; SSE2-NEXT: retq 1002; 1003; SSE42-LABEL: is_positive_mask_v4i64: 1004; SSE42: # %bb.0: 1005; SSE42-NEXT: pcmpeqd %xmm4, %xmm4 1006; SSE42-NEXT: pcmpgtq %xmm4, %xmm1 1007; SSE42-NEXT: pcmpgtq %xmm4, %xmm0 1008; SSE42-NEXT: pand %xmm2, %xmm0 1009; SSE42-NEXT: pand %xmm3, %xmm1 1010; SSE42-NEXT: retq 1011; 1012; AVX1-LABEL: is_positive_mask_v4i64: 1013; AVX1: # %bb.0: 1014; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 1015; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 1016; AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2 1017; AVX1-NEXT: vpcmpgtq %xmm3, %xmm0, %xmm0 1018; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1019; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 1020; AVX1-NEXT: retq 1021; 1022; AVX2-LABEL: is_positive_mask_v4i64: 1023; AVX2: # %bb.0: 1024; AVX2-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 1025; AVX2-NEXT: vpcmpgtq %ymm2, %ymm0, %ymm0 1026; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 1027; AVX2-NEXT: retq 1028; 1029; AVX512-LABEL: is_positive_mask_v4i64: 1030; AVX512: # %bb.0: 1031; AVX512-NEXT: vpsraq $63, %ymm0, %ymm0 1032; AVX512-NEXT: vpandn %ymm1, %ymm0, %ymm0 1033; AVX512-NEXT: retq 1034 %cmp = icmp sgt <4 x i64> %x, <i64 -1, i64 -1, i64 -1, i64 -1> 1035 %mask = sext <4 x i1> %cmp to <4 x i64> 1036 %and = and <4 x i64> %mask, %y 1037 ret <4 x i64> %and 1038} 1039 1040define <8 x i32> @is_positive_mask_v8i32(<8 x i32> %x, <8 x i32> %y) { 1041; SSE-LABEL: is_positive_mask_v8i32: 1042; SSE: # %bb.0: 1043; SSE-NEXT: psrad $31, %xmm0 1044; SSE-NEXT: pandn %xmm2, %xmm0 1045; SSE-NEXT: psrad $31, %xmm1 1046; SSE-NEXT: pandn %xmm3, %xmm1 1047; SSE-NEXT: retq 1048; 1049; AVX1-LABEL: is_positive_mask_v8i32: 1050; AVX1: # %bb.0: 1051; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 1052; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 1053; AVX1-NEXT: vpcmpgtd %xmm3, %xmm2, %xmm2 1054; AVX1-NEXT: vpcmpgtd %xmm3, %xmm0, %xmm0 1055; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1056; AVX1-NEXT: vandps %ymm0, %ymm1, %ymm0 1057; AVX1-NEXT: retq 1058; 1059; AVX2-LABEL: is_positive_mask_v8i32: 1060; AVX2: # %bb.0: 1061; AVX2-NEXT: vpsrad $31, %ymm0, %ymm0 1062; AVX2-NEXT: vpandn %ymm1, %ymm0, %ymm0 1063; AVX2-NEXT: retq 1064; 1065; AVX512-LABEL: is_positive_mask_v8i32: 1066; AVX512: # %bb.0: 1067; AVX512-NEXT: vpsrad $31, %ymm0, %ymm0 1068; AVX512-NEXT: vpandn %ymm1, %ymm0, %ymm0 1069; AVX512-NEXT: retq 1070 %cmp = icmp sgt <8 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1> 1071 %mask = sext <8 x i1> %cmp to <8 x i32> 1072 %and = and <8 x i32> %y, %mask 1073 ret <8 x i32> %and 1074} 1075 1076define <16 x i16> @is_positive_mask_v16i16(<16 x i16> %x, <16 x i16> %y) { 1077; SSE-LABEL: is_positive_mask_v16i16: 1078; SSE: # %bb.0: 1079; SSE-NEXT: psraw $15, %xmm0 1080; SSE-NEXT: pandn %xmm2, %xmm0 1081; SSE-NEXT: psraw $15, %xmm1 1082; SSE-NEXT: pandn %xmm3, %xmm1 1083; SSE-NEXT: retq 1084; 1085; AVX1-LABEL: is_positive_mask_v16i16: 1086; AVX1: # %bb.0: 1087; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 1088; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 1089; AVX1-NEXT: vpcmpgtw %xmm3, %xmm2, %xmm2 1090; AVX1-NEXT: vpcmpgtw %xmm3, %xmm0, %xmm0 1091; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1092; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 1093; AVX1-NEXT: retq 1094; 1095; AVX2-LABEL: is_positive_mask_v16i16: 1096; AVX2: # %bb.0: 1097; AVX2-NEXT: vpsraw $15, %ymm0, %ymm0 1098; AVX2-NEXT: vpandn %ymm1, %ymm0, %ymm0 1099; AVX2-NEXT: retq 1100; 1101; AVX512-LABEL: is_positive_mask_v16i16: 1102; AVX512: # %bb.0: 1103; AVX512-NEXT: vpsraw $15, %ymm0, %ymm0 1104; AVX512-NEXT: vpandn %ymm1, %ymm0, %ymm0 1105; AVX512-NEXT: retq 1106 %cmp = icmp sgt <16 x i16> %x, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> 1107 %mask = sext <16 x i1> %cmp to <16 x i16> 1108 %and = and <16 x i16> %mask, %y 1109 ret <16 x i16> %and 1110} 1111 1112define <32 x i8> @is_positive_mask_v32i8(<32 x i8> %x, <32 x i8> %y) { 1113; SSE-LABEL: is_positive_mask_v32i8: 1114; SSE: # %bb.0: 1115; SSE-NEXT: pcmpeqd %xmm4, %xmm4 1116; SSE-NEXT: pcmpgtb %xmm4, %xmm1 1117; SSE-NEXT: pcmpgtb %xmm4, %xmm0 1118; SSE-NEXT: pand %xmm2, %xmm0 1119; SSE-NEXT: pand %xmm3, %xmm1 1120; SSE-NEXT: retq 1121; 1122; AVX1-LABEL: is_positive_mask_v32i8: 1123; AVX1: # %bb.0: 1124; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 1125; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 1126; AVX1-NEXT: vpcmpgtb %xmm3, %xmm2, %xmm2 1127; AVX1-NEXT: vpcmpgtb %xmm3, %xmm0, %xmm0 1128; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1129; AVX1-NEXT: vandps %ymm0, %ymm1, %ymm0 1130; AVX1-NEXT: retq 1131; 1132; AVX2-LABEL: is_positive_mask_v32i8: 1133; AVX2: # %bb.0: 1134; AVX2-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 1135; AVX2-NEXT: vpcmpgtb %ymm2, %ymm0, %ymm0 1136; AVX2-NEXT: vpand %ymm0, %ymm1, %ymm0 1137; AVX2-NEXT: retq 1138; 1139; AVX512-LABEL: is_positive_mask_v32i8: 1140; AVX512: # %bb.0: 1141; AVX512-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 1142; AVX512-NEXT: vpcmpgtb %ymm2, %ymm0, %ymm0 1143; AVX512-NEXT: vpand %ymm0, %ymm1, %ymm0 1144; AVX512-NEXT: retq 1145 %cmp = icmp sgt <32 x i8> %x, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 1146 %mask = sext <32 x i1> %cmp to <32 x i8> 1147 %and = and <32 x i8> %y, %mask 1148 ret <32 x i8> %and 1149} 1150 1151define <2 x i64> @ispositive_mask_load_v2i64(<2 x i64> %x, ptr %p) { 1152; SSE2-LABEL: ispositive_mask_load_v2i64: 1153; SSE2: # %bb.0: 1154; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 1155; SSE2-NEXT: pcmpeqd %xmm1, %xmm1 1156; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 1157; SSE2-NEXT: pand (%rdi), %xmm0 1158; SSE2-NEXT: retq 1159; 1160; SSE42-LABEL: ispositive_mask_load_v2i64: 1161; SSE42: # %bb.0: 1162; SSE42-NEXT: pcmpeqd %xmm1, %xmm1 1163; SSE42-NEXT: pcmpgtq %xmm1, %xmm0 1164; SSE42-NEXT: pand (%rdi), %xmm0 1165; SSE42-NEXT: retq 1166; 1167; AVX1-LABEL: ispositive_mask_load_v2i64: 1168; AVX1: # %bb.0: 1169; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 1170; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 1171; AVX1-NEXT: vpand (%rdi), %xmm0, %xmm0 1172; AVX1-NEXT: retq 1173; 1174; AVX2-LABEL: ispositive_mask_load_v2i64: 1175; AVX2: # %bb.0: 1176; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 1177; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 1178; AVX2-NEXT: vpand (%rdi), %xmm0, %xmm0 1179; AVX2-NEXT: retq 1180; 1181; AVX512-LABEL: ispositive_mask_load_v2i64: 1182; AVX512: # %bb.0: 1183; AVX512-NEXT: vpsraq $63, %xmm0, %xmm0 1184; AVX512-NEXT: vpandn (%rdi), %xmm0, %xmm0 1185; AVX512-NEXT: retq 1186 %cmp = icmp sgt <2 x i64> %x, <i64 -1, i64 -1> 1187 %mask = sext <2 x i1> %cmp to <2 x i64> 1188 %y = load <2 x i64>, ptr %p 1189 %and = and <2 x i64> %mask, %y 1190 ret <2 x i64> %and 1191} 1192 1193define <4 x i32> @is_positive_mask_load_v4i32(<4 x i32> %x, ptr %p) { 1194; SSE-LABEL: is_positive_mask_load_v4i32: 1195; SSE: # %bb.0: 1196; SSE-NEXT: psrad $31, %xmm0 1197; SSE-NEXT: pandn (%rdi), %xmm0 1198; SSE-NEXT: retq 1199; 1200; AVX-LABEL: is_positive_mask_load_v4i32: 1201; AVX: # %bb.0: 1202; AVX-NEXT: vpsrad $31, %xmm0, %xmm0 1203; AVX-NEXT: vpandn (%rdi), %xmm0, %xmm0 1204; AVX-NEXT: retq 1205 %cmp = icmp sgt <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> 1206 %mask = sext <4 x i1> %cmp to <4 x i32> 1207 %y = load <4 x i32>, ptr %p 1208 %and = and <4 x i32> %y, %mask 1209 ret <4 x i32> %and 1210} 1211 1212define <8 x i16> @is_positive_mask_load_v8i16(<8 x i16> %x, ptr %p) { 1213; SSE-LABEL: is_positive_mask_load_v8i16: 1214; SSE: # %bb.0: 1215; SSE-NEXT: psraw $15, %xmm0 1216; SSE-NEXT: pandn (%rdi), %xmm0 1217; SSE-NEXT: retq 1218; 1219; AVX-LABEL: is_positive_mask_load_v8i16: 1220; AVX: # %bb.0: 1221; AVX-NEXT: vpsraw $15, %xmm0, %xmm0 1222; AVX-NEXT: vpandn (%rdi), %xmm0, %xmm0 1223; AVX-NEXT: retq 1224 %cmp = icmp sgt <8 x i16> %x, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> 1225 %mask = sext <8 x i1> %cmp to <8 x i16> 1226 %y = load <8 x i16>, ptr %p 1227 %and = and <8 x i16> %mask, %y 1228 ret <8 x i16> %and 1229} 1230 1231define <16 x i8> @is_positive_mask_load_v16i8(<16 x i8> %x, ptr %p) { 1232; SSE-LABEL: is_positive_mask_load_v16i8: 1233; SSE: # %bb.0: 1234; SSE-NEXT: pcmpeqd %xmm1, %xmm1 1235; SSE-NEXT: pcmpgtb %xmm1, %xmm0 1236; SSE-NEXT: pand (%rdi), %xmm0 1237; SSE-NEXT: retq 1238; 1239; AVX-LABEL: is_positive_mask_load_v16i8: 1240; AVX: # %bb.0: 1241; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 1242; AVX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0 1243; AVX-NEXT: vpand (%rdi), %xmm0, %xmm0 1244; AVX-NEXT: retq 1245 %cmp = icmp sgt <16 x i8> %x, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 1246 %mask = sext <16 x i1> %cmp to <16 x i8> 1247 %y = load <16 x i8>, ptr %p 1248 %and = and <16 x i8> %y, %mask 1249 ret <16 x i8> %and 1250} 1251 1252define <4 x i64> @is_positive_mask_load_v4i64(<4 x i64> %x, ptr %p) { 1253; SSE2-LABEL: is_positive_mask_load_v4i64: 1254; SSE2: # %bb.0: 1255; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 1256; SSE2-NEXT: pcmpeqd %xmm2, %xmm2 1257; SSE2-NEXT: pcmpgtd %xmm2, %xmm1 1258; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 1259; SSE2-NEXT: pcmpgtd %xmm2, %xmm0 1260; SSE2-NEXT: pand (%rdi), %xmm0 1261; SSE2-NEXT: pand 16(%rdi), %xmm1 1262; SSE2-NEXT: retq 1263; 1264; SSE42-LABEL: is_positive_mask_load_v4i64: 1265; SSE42: # %bb.0: 1266; SSE42-NEXT: pcmpeqd %xmm2, %xmm2 1267; SSE42-NEXT: pcmpgtq %xmm2, %xmm1 1268; SSE42-NEXT: pcmpgtq %xmm2, %xmm0 1269; SSE42-NEXT: pand (%rdi), %xmm0 1270; SSE42-NEXT: pand 16(%rdi), %xmm1 1271; SSE42-NEXT: retq 1272; 1273; AVX1-LABEL: is_positive_mask_load_v4i64: 1274; AVX1: # %bb.0: 1275; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1276; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 1277; AVX1-NEXT: vpcmpgtq %xmm2, %xmm1, %xmm1 1278; AVX1-NEXT: vpcmpgtq %xmm2, %xmm0, %xmm0 1279; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1280; AVX1-NEXT: vandps (%rdi), %ymm0, %ymm0 1281; AVX1-NEXT: retq 1282; 1283; AVX2-LABEL: is_positive_mask_load_v4i64: 1284; AVX2: # %bb.0: 1285; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 1286; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 1287; AVX2-NEXT: vpand (%rdi), %ymm0, %ymm0 1288; AVX2-NEXT: retq 1289; 1290; AVX512-LABEL: is_positive_mask_load_v4i64: 1291; AVX512: # %bb.0: 1292; AVX512-NEXT: vpsraq $63, %ymm0, %ymm0 1293; AVX512-NEXT: vpandn (%rdi), %ymm0, %ymm0 1294; AVX512-NEXT: retq 1295 %cmp = icmp sgt <4 x i64> %x, <i64 -1, i64 -1, i64 -1, i64 -1> 1296 %mask = sext <4 x i1> %cmp to <4 x i64> 1297 %y = load <4 x i64>, ptr %p 1298 %and = and <4 x i64> %mask, %y 1299 ret <4 x i64> %and 1300} 1301 1302define <8 x i32> @is_positive_mask_load_v8i32(<8 x i32> %x, ptr %p) { 1303; SSE-LABEL: is_positive_mask_load_v8i32: 1304; SSE: # %bb.0: 1305; SSE-NEXT: psrad $31, %xmm0 1306; SSE-NEXT: pandn (%rdi), %xmm0 1307; SSE-NEXT: psrad $31, %xmm1 1308; SSE-NEXT: pandn 16(%rdi), %xmm1 1309; SSE-NEXT: retq 1310; 1311; AVX1-LABEL: is_positive_mask_load_v8i32: 1312; AVX1: # %bb.0: 1313; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1314; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 1315; AVX1-NEXT: vpcmpgtd %xmm2, %xmm1, %xmm1 1316; AVX1-NEXT: vpcmpgtd %xmm2, %xmm0, %xmm0 1317; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1318; AVX1-NEXT: vandps (%rdi), %ymm0, %ymm0 1319; AVX1-NEXT: retq 1320; 1321; AVX2-LABEL: is_positive_mask_load_v8i32: 1322; AVX2: # %bb.0: 1323; AVX2-NEXT: vpsrad $31, %ymm0, %ymm0 1324; AVX2-NEXT: vpandn (%rdi), %ymm0, %ymm0 1325; AVX2-NEXT: retq 1326; 1327; AVX512-LABEL: is_positive_mask_load_v8i32: 1328; AVX512: # %bb.0: 1329; AVX512-NEXT: vpsrad $31, %ymm0, %ymm0 1330; AVX512-NEXT: vpandn (%rdi), %ymm0, %ymm0 1331; AVX512-NEXT: retq 1332 %cmp = icmp sgt <8 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1> 1333 %mask = sext <8 x i1> %cmp to <8 x i32> 1334 %y = load <8 x i32>, ptr %p 1335 %and = and <8 x i32> %y, %mask 1336 ret <8 x i32> %and 1337} 1338 1339define <16 x i16> @is_positive_mask_load_v16i16(<16 x i16> %x, ptr %p) { 1340; SSE-LABEL: is_positive_mask_load_v16i16: 1341; SSE: # %bb.0: 1342; SSE-NEXT: psraw $15, %xmm0 1343; SSE-NEXT: pandn (%rdi), %xmm0 1344; SSE-NEXT: psraw $15, %xmm1 1345; SSE-NEXT: pandn 16(%rdi), %xmm1 1346; SSE-NEXT: retq 1347; 1348; AVX1-LABEL: is_positive_mask_load_v16i16: 1349; AVX1: # %bb.0: 1350; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1351; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 1352; AVX1-NEXT: vpcmpgtw %xmm2, %xmm1, %xmm1 1353; AVX1-NEXT: vpcmpgtw %xmm2, %xmm0, %xmm0 1354; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1355; AVX1-NEXT: vandps (%rdi), %ymm0, %ymm0 1356; AVX1-NEXT: retq 1357; 1358; AVX2-LABEL: is_positive_mask_load_v16i16: 1359; AVX2: # %bb.0: 1360; AVX2-NEXT: vpsraw $15, %ymm0, %ymm0 1361; AVX2-NEXT: vpandn (%rdi), %ymm0, %ymm0 1362; AVX2-NEXT: retq 1363; 1364; AVX512-LABEL: is_positive_mask_load_v16i16: 1365; AVX512: # %bb.0: 1366; AVX512-NEXT: vpsraw $15, %ymm0, %ymm0 1367; AVX512-NEXT: vpandn (%rdi), %ymm0, %ymm0 1368; AVX512-NEXT: retq 1369 %cmp = icmp sgt <16 x i16> %x, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> 1370 %mask = sext <16 x i1> %cmp to <16 x i16> 1371 %y = load <16 x i16>, ptr %p 1372 %and = and <16 x i16> %mask, %y 1373 ret <16 x i16> %and 1374} 1375 1376define <32 x i8> @is_positive_mask_load_v32i8(<32 x i8> %x, ptr %p) { 1377; SSE-LABEL: is_positive_mask_load_v32i8: 1378; SSE: # %bb.0: 1379; SSE-NEXT: pcmpeqd %xmm2, %xmm2 1380; SSE-NEXT: pcmpgtb %xmm2, %xmm1 1381; SSE-NEXT: pcmpgtb %xmm2, %xmm0 1382; SSE-NEXT: pand (%rdi), %xmm0 1383; SSE-NEXT: pand 16(%rdi), %xmm1 1384; SSE-NEXT: retq 1385; 1386; AVX1-LABEL: is_positive_mask_load_v32i8: 1387; AVX1: # %bb.0: 1388; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1389; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 1390; AVX1-NEXT: vpcmpgtb %xmm2, %xmm1, %xmm1 1391; AVX1-NEXT: vpcmpgtb %xmm2, %xmm0, %xmm0 1392; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1393; AVX1-NEXT: vandps (%rdi), %ymm0, %ymm0 1394; AVX1-NEXT: retq 1395; 1396; AVX2-LABEL: is_positive_mask_load_v32i8: 1397; AVX2: # %bb.0: 1398; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 1399; AVX2-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 1400; AVX2-NEXT: vpand (%rdi), %ymm0, %ymm0 1401; AVX2-NEXT: retq 1402; 1403; AVX512-LABEL: is_positive_mask_load_v32i8: 1404; AVX512: # %bb.0: 1405; AVX512-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 1406; AVX512-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 1407; AVX512-NEXT: vpand (%rdi), %ymm0, %ymm0 1408; AVX512-NEXT: retq 1409 %cmp = icmp sgt <32 x i8> %x, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 1410 %mask = sext <32 x i1> %cmp to <32 x i8> 1411 %y = load <32 x i8>, ptr %p 1412 %and = and <32 x i8> %y, %mask 1413 ret <32 x i8> %and 1414} 1415 1416define <2 x i1> @ispositive_mask_v2i64_v2i1(<2 x i64> %x, <2 x i1> %y) { 1417; SSE2-LABEL: ispositive_mask_v2i64_v2i1: 1418; SSE2: # %bb.0: 1419; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 1420; SSE2-NEXT: pcmpeqd %xmm2, %xmm2 1421; SSE2-NEXT: pcmpgtd %xmm2, %xmm0 1422; SSE2-NEXT: pand %xmm1, %xmm0 1423; SSE2-NEXT: retq 1424; 1425; SSE42-LABEL: ispositive_mask_v2i64_v2i1: 1426; SSE42: # %bb.0: 1427; SSE42-NEXT: pcmpeqd %xmm2, %xmm2 1428; SSE42-NEXT: pcmpgtq %xmm2, %xmm0 1429; SSE42-NEXT: pand %xmm1, %xmm0 1430; SSE42-NEXT: retq 1431; 1432; AVX1-LABEL: ispositive_mask_v2i64_v2i1: 1433; AVX1: # %bb.0: 1434; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 1435; AVX1-NEXT: vpcmpgtq %xmm2, %xmm0, %xmm0 1436; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 1437; AVX1-NEXT: retq 1438; 1439; AVX2-LABEL: ispositive_mask_v2i64_v2i1: 1440; AVX2: # %bb.0: 1441; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 1442; AVX2-NEXT: vpcmpgtq %xmm2, %xmm0, %xmm0 1443; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 1444; AVX2-NEXT: retq 1445; 1446; AVX512F-LABEL: ispositive_mask_v2i64_v2i1: 1447; AVX512F: # %bb.0: 1448; AVX512F-NEXT: vpsllq $63, %xmm1, %xmm1 1449; AVX512F-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 1450; AVX512F-NEXT: vpcmpgtq %xmm2, %xmm0, %k1 1451; AVX512F-NEXT: vptestmq %xmm1, %xmm1, %k1 {%k1} 1452; AVX512F-NEXT: vmovdqa64 %xmm2, %xmm0 {%k1} {z} 1453; AVX512F-NEXT: retq 1454; 1455; AVX512DQBW-LABEL: ispositive_mask_v2i64_v2i1: 1456; AVX512DQBW: # %bb.0: 1457; AVX512DQBW-NEXT: vpsllq $63, %xmm1, %xmm1 1458; AVX512DQBW-NEXT: vpxor %xmm2, %xmm2, %xmm2 1459; AVX512DQBW-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 1460; AVX512DQBW-NEXT: vpcmpgtq %xmm3, %xmm0, %k1 1461; AVX512DQBW-NEXT: vpcmpgtq %xmm1, %xmm2, %k0 {%k1} 1462; AVX512DQBW-NEXT: vpmovm2q %k0, %xmm0 1463; AVX512DQBW-NEXT: retq 1464 %cmp = icmp sgt <2 x i64> %x, <i64 -1, i64 -1> 1465 %and = and <2 x i1> %cmp, %y 1466 ret <2 x i1> %and 1467} 1468 1469define <4 x i1> @is_positive_mask_v4i32_v4i1(<4 x i32> %x, <4 x i1> %y) { 1470; SSE-LABEL: is_positive_mask_v4i32_v4i1: 1471; SSE: # %bb.0: 1472; SSE-NEXT: psrad $31, %xmm0 1473; SSE-NEXT: pandn %xmm1, %xmm0 1474; SSE-NEXT: retq 1475; 1476; AVX1-LABEL: is_positive_mask_v4i32_v4i1: 1477; AVX1: # %bb.0: 1478; AVX1-NEXT: vpsrad $31, %xmm0, %xmm0 1479; AVX1-NEXT: vpandn %xmm1, %xmm0, %xmm0 1480; AVX1-NEXT: retq 1481; 1482; AVX2-LABEL: is_positive_mask_v4i32_v4i1: 1483; AVX2: # %bb.0: 1484; AVX2-NEXT: vpsrad $31, %xmm0, %xmm0 1485; AVX2-NEXT: vpandn %xmm1, %xmm0, %xmm0 1486; AVX2-NEXT: retq 1487; 1488; AVX512F-LABEL: is_positive_mask_v4i32_v4i1: 1489; AVX512F: # %bb.0: 1490; AVX512F-NEXT: vpslld $31, %xmm1, %xmm1 1491; AVX512F-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 1492; AVX512F-NEXT: vpcmpgtd %xmm2, %xmm0, %k1 1493; AVX512F-NEXT: vptestmd %xmm1, %xmm1, %k1 {%k1} 1494; AVX512F-NEXT: vmovdqa32 %xmm2, %xmm0 {%k1} {z} 1495; AVX512F-NEXT: retq 1496; 1497; AVX512DQBW-LABEL: is_positive_mask_v4i32_v4i1: 1498; AVX512DQBW: # %bb.0: 1499; AVX512DQBW-NEXT: vpslld $31, %xmm1, %xmm1 1500; AVX512DQBW-NEXT: vpmovd2m %xmm1, %k1 1501; AVX512DQBW-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 1502; AVX512DQBW-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 {%k1} 1503; AVX512DQBW-NEXT: vpmovm2d %k0, %xmm0 1504; AVX512DQBW-NEXT: retq 1505 %cmp = icmp sgt <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> 1506 %and = and <4 x i1> %y, %cmp 1507 ret <4 x i1> %and 1508} 1509 1510define <8 x i1> @is_positive_mask_v8i16_v8i1(<8 x i16> %x, <8 x i1> %y) { 1511; SSE-LABEL: is_positive_mask_v8i16_v8i1: 1512; SSE: # %bb.0: 1513; SSE-NEXT: psraw $15, %xmm0 1514; SSE-NEXT: pandn %xmm1, %xmm0 1515; SSE-NEXT: retq 1516; 1517; AVX1-LABEL: is_positive_mask_v8i16_v8i1: 1518; AVX1: # %bb.0: 1519; AVX1-NEXT: vpsraw $15, %xmm0, %xmm0 1520; AVX1-NEXT: vpandn %xmm1, %xmm0, %xmm0 1521; AVX1-NEXT: retq 1522; 1523; AVX2-LABEL: is_positive_mask_v8i16_v8i1: 1524; AVX2: # %bb.0: 1525; AVX2-NEXT: vpsraw $15, %xmm0, %xmm0 1526; AVX2-NEXT: vpandn %xmm1, %xmm0, %xmm0 1527; AVX2-NEXT: retq 1528; 1529; AVX512F-LABEL: is_positive_mask_v8i16_v8i1: 1530; AVX512F: # %bb.0: 1531; AVX512F-NEXT: vpsraw $15, %xmm0, %xmm0 1532; AVX512F-NEXT: vpandn %xmm1, %xmm0, %xmm0 1533; AVX512F-NEXT: retq 1534; 1535; AVX512DQBW-LABEL: is_positive_mask_v8i16_v8i1: 1536; AVX512DQBW: # %bb.0: 1537; AVX512DQBW-NEXT: vpsllw $15, %xmm1, %xmm1 1538; AVX512DQBW-NEXT: vpxor %xmm2, %xmm2, %xmm2 1539; AVX512DQBW-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 1540; AVX512DQBW-NEXT: vpcmpgtw %xmm3, %xmm0, %k1 1541; AVX512DQBW-NEXT: vpcmpgtw %xmm1, %xmm2, %k0 {%k1} 1542; AVX512DQBW-NEXT: vpmovm2w %k0, %xmm0 1543; AVX512DQBW-NEXT: retq 1544 %cmp = icmp sgt <8 x i16> %x, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> 1545 %and = and <8 x i1> %cmp, %y 1546 ret <8 x i1> %and 1547} 1548 1549define <16 x i1> @is_positive_mask_v16i8_v16i1(<16 x i8> %x, <16 x i1> %y) { 1550; SSE-LABEL: is_positive_mask_v16i8_v16i1: 1551; SSE: # %bb.0: 1552; SSE-NEXT: pcmpeqd %xmm2, %xmm2 1553; SSE-NEXT: pcmpgtb %xmm2, %xmm0 1554; SSE-NEXT: pand %xmm1, %xmm0 1555; SSE-NEXT: retq 1556; 1557; AVX1-LABEL: is_positive_mask_v16i8_v16i1: 1558; AVX1: # %bb.0: 1559; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 1560; AVX1-NEXT: vpcmpgtb %xmm2, %xmm0, %xmm0 1561; AVX1-NEXT: vpand %xmm0, %xmm1, %xmm0 1562; AVX1-NEXT: retq 1563; 1564; AVX2-LABEL: is_positive_mask_v16i8_v16i1: 1565; AVX2: # %bb.0: 1566; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 1567; AVX2-NEXT: vpcmpgtb %xmm2, %xmm0, %xmm0 1568; AVX2-NEXT: vpand %xmm0, %xmm1, %xmm0 1569; AVX2-NEXT: retq 1570; 1571; AVX512F-LABEL: is_positive_mask_v16i8_v16i1: 1572; AVX512F: # %bb.0: 1573; AVX512F-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 1574; AVX512F-NEXT: vpcmpgtb %xmm2, %xmm0, %xmm0 1575; AVX512F-NEXT: vpand %xmm0, %xmm1, %xmm0 1576; AVX512F-NEXT: retq 1577; 1578; AVX512DQBW-LABEL: is_positive_mask_v16i8_v16i1: 1579; AVX512DQBW: # %bb.0: 1580; AVX512DQBW-NEXT: vpsllw $7, %xmm1, %xmm1 1581; AVX512DQBW-NEXT: vpmovb2m %xmm1, %k1 1582; AVX512DQBW-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 1583; AVX512DQBW-NEXT: vpcmpgtb %xmm1, %xmm0, %k0 {%k1} 1584; AVX512DQBW-NEXT: vpmovm2b %k0, %xmm0 1585; AVX512DQBW-NEXT: retq 1586 %cmp = icmp sgt <16 x i8> %x, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 1587 %and = and <16 x i1> %y, %cmp 1588 ret <16 x i1> %and 1589} 1590 1591define <4 x i1> @is_positive_mask_v4i64_v4i1(<4 x i64> %x, <4 x i1> %y) { 1592; SSE2-LABEL: is_positive_mask_v4i64_v4i1: 1593; SSE2: # %bb.0: 1594; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3] 1595; SSE2-NEXT: psrad $31, %xmm0 1596; SSE2-NEXT: pandn %xmm2, %xmm0 1597; SSE2-NEXT: retq 1598; 1599; SSE42-LABEL: is_positive_mask_v4i64_v4i1: 1600; SSE42: # %bb.0: 1601; SSE42-NEXT: pcmpeqd %xmm3, %xmm3 1602; SSE42-NEXT: pcmpgtq %xmm3, %xmm1 1603; SSE42-NEXT: pcmpgtq %xmm3, %xmm0 1604; SSE42-NEXT: packssdw %xmm1, %xmm0 1605; SSE42-NEXT: pand %xmm2, %xmm0 1606; SSE42-NEXT: retq 1607; 1608; AVX1-LABEL: is_positive_mask_v4i64_v4i1: 1609; AVX1: # %bb.0: 1610; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 1611; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 1612; AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2 1613; AVX1-NEXT: vpcmpgtq %xmm3, %xmm0, %xmm0 1614; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 1615; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 1616; AVX1-NEXT: vzeroupper 1617; AVX1-NEXT: retq 1618; 1619; AVX2-LABEL: is_positive_mask_v4i64_v4i1: 1620; AVX2: # %bb.0: 1621; AVX2-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 1622; AVX2-NEXT: vpcmpgtq %ymm2, %ymm0, %ymm0 1623; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2 1624; AVX2-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 1625; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 1626; AVX2-NEXT: vzeroupper 1627; AVX2-NEXT: retq 1628; 1629; AVX512F-LABEL: is_positive_mask_v4i64_v4i1: 1630; AVX512F: # %bb.0: 1631; AVX512F-NEXT: vpslld $31, %xmm1, %xmm1 1632; AVX512F-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 1633; AVX512F-NEXT: vpcmpgtq %ymm2, %ymm0, %k1 1634; AVX512F-NEXT: vptestmd %xmm1, %xmm1, %k1 {%k1} 1635; AVX512F-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 1636; AVX512F-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 1637; AVX512F-NEXT: vzeroupper 1638; AVX512F-NEXT: retq 1639; 1640; AVX512DQBW-LABEL: is_positive_mask_v4i64_v4i1: 1641; AVX512DQBW: # %bb.0: 1642; AVX512DQBW-NEXT: vpslld $31, %xmm1, %xmm1 1643; AVX512DQBW-NEXT: vpxor %xmm2, %xmm2, %xmm2 1644; AVX512DQBW-NEXT: vpcmpeqd %ymm3, %ymm3, %ymm3 1645; AVX512DQBW-NEXT: vpcmpgtq %ymm3, %ymm0, %k1 1646; AVX512DQBW-NEXT: vpcmpgtd %xmm1, %xmm2, %k0 {%k1} 1647; AVX512DQBW-NEXT: vpmovm2d %k0, %xmm0 1648; AVX512DQBW-NEXT: vzeroupper 1649; AVX512DQBW-NEXT: retq 1650 %cmp = icmp sgt <4 x i64> %x, <i64 -1, i64 -1, i64 -1, i64 -1> 1651 %and = and <4 x i1> %cmp, %y 1652 ret <4 x i1> %and 1653} 1654 1655define <8 x i1> @is_positive_mask_v8i32_v8i1(<8 x i32> %x, <8 x i1> %y) { 1656; SSE-LABEL: is_positive_mask_v8i32_v8i1: 1657; SSE: # %bb.0: 1658; SSE-NEXT: pcmpeqd %xmm3, %xmm3 1659; SSE-NEXT: pcmpgtd %xmm3, %xmm1 1660; SSE-NEXT: pcmpgtd %xmm3, %xmm0 1661; SSE-NEXT: packssdw %xmm1, %xmm0 1662; SSE-NEXT: pand %xmm2, %xmm0 1663; SSE-NEXT: retq 1664; 1665; AVX1-LABEL: is_positive_mask_v8i32_v8i1: 1666; AVX1: # %bb.0: 1667; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 1668; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 1669; AVX1-NEXT: vpcmpgtd %xmm3, %xmm2, %xmm2 1670; AVX1-NEXT: vpcmpgtd %xmm3, %xmm0, %xmm0 1671; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 1672; AVX1-NEXT: vpand %xmm0, %xmm1, %xmm0 1673; AVX1-NEXT: vzeroupper 1674; AVX1-NEXT: retq 1675; 1676; AVX2-LABEL: is_positive_mask_v8i32_v8i1: 1677; AVX2: # %bb.0: 1678; AVX2-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 1679; AVX2-NEXT: vpcmpgtd %ymm2, %ymm0, %ymm0 1680; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2 1681; AVX2-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 1682; AVX2-NEXT: vpand %xmm0, %xmm1, %xmm0 1683; AVX2-NEXT: vzeroupper 1684; AVX2-NEXT: retq 1685; 1686; AVX512F-LABEL: is_positive_mask_v8i32_v8i1: 1687; AVX512F: # %bb.0: 1688; AVX512F-NEXT: vpmovsxwd %xmm1, %ymm1 1689; AVX512F-NEXT: vpslld $31, %ymm1, %ymm1 1690; AVX512F-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 1691; AVX512F-NEXT: vpcmpgtd %ymm2, %ymm0, %k1 1692; AVX512F-NEXT: vptestmd %ymm1, %ymm1, %k1 {%k1} 1693; AVX512F-NEXT: vmovdqa32 %ymm2, %ymm0 {%k1} {z} 1694; AVX512F-NEXT: vpmovdw %ymm0, %xmm0 1695; AVX512F-NEXT: vzeroupper 1696; AVX512F-NEXT: retq 1697; 1698; AVX512DQBW-LABEL: is_positive_mask_v8i32_v8i1: 1699; AVX512DQBW: # %bb.0: 1700; AVX512DQBW-NEXT: vpsllw $15, %xmm1, %xmm1 1701; AVX512DQBW-NEXT: vpmovw2m %xmm1, %k1 1702; AVX512DQBW-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 1703; AVX512DQBW-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 {%k1} 1704; AVX512DQBW-NEXT: vpmovm2w %k0, %xmm0 1705; AVX512DQBW-NEXT: vzeroupper 1706; AVX512DQBW-NEXT: retq 1707 %cmp = icmp sgt <8 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1> 1708 %and = and <8 x i1> %y, %cmp 1709 ret <8 x i1> %and 1710} 1711 1712define <16 x i1> @is_positive_mask_v16i16_v16i1(<16 x i16> %x, <16 x i1> %y) { 1713; SSE-LABEL: is_positive_mask_v16i16_v16i1: 1714; SSE: # %bb.0: 1715; SSE-NEXT: pcmpeqd %xmm3, %xmm3 1716; SSE-NEXT: pcmpgtw %xmm3, %xmm1 1717; SSE-NEXT: pcmpgtw %xmm3, %xmm0 1718; SSE-NEXT: packsswb %xmm1, %xmm0 1719; SSE-NEXT: pand %xmm2, %xmm0 1720; SSE-NEXT: retq 1721; 1722; AVX1-LABEL: is_positive_mask_v16i16_v16i1: 1723; AVX1: # %bb.0: 1724; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 1725; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 1726; AVX1-NEXT: vpcmpgtw %xmm3, %xmm2, %xmm2 1727; AVX1-NEXT: vpcmpgtw %xmm3, %xmm0, %xmm0 1728; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0 1729; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 1730; AVX1-NEXT: vzeroupper 1731; AVX1-NEXT: retq 1732; 1733; AVX2-LABEL: is_positive_mask_v16i16_v16i1: 1734; AVX2: # %bb.0: 1735; AVX2-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 1736; AVX2-NEXT: vpcmpgtw %ymm2, %ymm0, %ymm0 1737; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2 1738; AVX2-NEXT: vpacksswb %xmm2, %xmm0, %xmm0 1739; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 1740; AVX2-NEXT: vzeroupper 1741; AVX2-NEXT: retq 1742; 1743; AVX512F-LABEL: is_positive_mask_v16i16_v16i1: 1744; AVX512F: # %bb.0: 1745; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm1 1746; AVX512F-NEXT: vpslld $31, %zmm1, %zmm1 1747; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k1 1748; AVX512F-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 1749; AVX512F-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 1750; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0 1751; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1} 1752; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 1753; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 1754; AVX512F-NEXT: vzeroupper 1755; AVX512F-NEXT: retq 1756; 1757; AVX512DQBW-LABEL: is_positive_mask_v16i16_v16i1: 1758; AVX512DQBW: # %bb.0: 1759; AVX512DQBW-NEXT: vpsllw $7, %xmm1, %xmm1 1760; AVX512DQBW-NEXT: vpxor %xmm2, %xmm2, %xmm2 1761; AVX512DQBW-NEXT: vpcmpeqd %ymm3, %ymm3, %ymm3 1762; AVX512DQBW-NEXT: vpcmpgtw %ymm3, %ymm0, %k1 1763; AVX512DQBW-NEXT: vpcmpgtb %xmm1, %xmm2, %k0 {%k1} 1764; AVX512DQBW-NEXT: vpmovm2b %k0, %xmm0 1765; AVX512DQBW-NEXT: vzeroupper 1766; AVX512DQBW-NEXT: retq 1767 %cmp = icmp sgt <16 x i16> %x, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> 1768 %and = and <16 x i1> %cmp, %y 1769 ret <16 x i1> %and 1770} 1771 1772define <32 x i1> @is_positive_mask_v32i8_v32i1(<32 x i8> %x, <32 x i1> %y) { 1773; SSE2-LABEL: is_positive_mask_v32i8_v32i1: 1774; SSE2: # %bb.0: 1775; SSE2-NEXT: movq %rdi, %rax 1776; SSE2-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero 1777; SSE2-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero 1778; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] 1779; SSE2-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero 1780; SSE2-NEXT: movd {{.*#+}} xmm4 = mem[0],zero,zero,zero 1781; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3],xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7] 1782; SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3] 1783; SSE2-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero 1784; SSE2-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero 1785; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] 1786; SSE2-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero 1787; SSE2-NEXT: movd {{.*#+}} xmm5 = mem[0],zero,zero,zero 1788; SSE2-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm2[0],xmm5[1],xmm2[1],xmm5[2],xmm2[2],xmm5[3],xmm2[3],xmm5[4],xmm2[4],xmm5[5],xmm2[5],xmm5[6],xmm2[6],xmm5[7],xmm2[7] 1789; SSE2-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1],xmm5[2],xmm3[2],xmm5[3],xmm3[3] 1790; SSE2-NEXT: punpckldq {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1] 1791; SSE2-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero 1792; SSE2-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero 1793; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] 1794; SSE2-NEXT: movd %r9d, %xmm4 1795; SSE2-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero 1796; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3],xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7] 1797; SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3] 1798; SSE2-NEXT: movd %r8d, %xmm2 1799; SSE2-NEXT: movd %ecx, %xmm3 1800; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] 1801; SSE2-NEXT: movd %edx, %xmm6 1802; SSE2-NEXT: movd %esi, %xmm2 1803; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm6[0],xmm2[1],xmm6[1],xmm2[2],xmm6[2],xmm2[3],xmm6[3],xmm2[4],xmm6[4],xmm2[5],xmm6[5],xmm2[6],xmm6[6],xmm2[7],xmm6[7] 1804; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3] 1805; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1] 1806; SSE2-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm5[0] 1807; SSE2-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero 1808; SSE2-NEXT: movd {{.*#+}} xmm4 = mem[0],zero,zero,zero 1809; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7] 1810; SSE2-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero 1811; SSE2-NEXT: movd {{.*#+}} xmm5 = mem[0],zero,zero,zero 1812; SSE2-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1],xmm5[2],xmm3[2],xmm5[3],xmm3[3],xmm5[4],xmm3[4],xmm5[5],xmm3[5],xmm5[6],xmm3[6],xmm5[7],xmm3[7] 1813; SSE2-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3] 1814; SSE2-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero 1815; SSE2-NEXT: movd {{.*#+}} xmm4 = mem[0],zero,zero,zero 1816; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7] 1817; SSE2-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero 1818; SSE2-NEXT: movd {{.*#+}} xmm6 = mem[0],zero,zero,zero 1819; SSE2-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm3[0],xmm6[1],xmm3[1],xmm6[2],xmm3[2],xmm6[3],xmm3[3],xmm6[4],xmm3[4],xmm6[5],xmm3[5],xmm6[6],xmm3[6],xmm6[7],xmm3[7] 1820; SSE2-NEXT: punpcklwd {{.*#+}} xmm6 = xmm6[0],xmm4[0],xmm6[1],xmm4[1],xmm6[2],xmm4[2],xmm6[3],xmm4[3] 1821; SSE2-NEXT: punpckldq {{.*#+}} xmm6 = xmm6[0],xmm5[0],xmm6[1],xmm5[1] 1822; SSE2-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero 1823; SSE2-NEXT: movd {{.*#+}} xmm4 = mem[0],zero,zero,zero 1824; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7] 1825; SSE2-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero 1826; SSE2-NEXT: movd {{.*#+}} xmm5 = mem[0],zero,zero,zero 1827; SSE2-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1],xmm5[2],xmm3[2],xmm5[3],xmm3[3],xmm5[4],xmm3[4],xmm5[5],xmm3[5],xmm5[6],xmm3[6],xmm5[7],xmm3[7] 1828; SSE2-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3] 1829; SSE2-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero 1830; SSE2-NEXT: movd {{.*#+}} xmm4 = mem[0],zero,zero,zero 1831; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7] 1832; SSE2-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero 1833; SSE2-NEXT: movd {{.*#+}} xmm7 = mem[0],zero,zero,zero 1834; SSE2-NEXT: punpcklbw {{.*#+}} xmm7 = xmm7[0],xmm3[0],xmm7[1],xmm3[1],xmm7[2],xmm3[2],xmm7[3],xmm3[3],xmm7[4],xmm3[4],xmm7[5],xmm3[5],xmm7[6],xmm3[6],xmm7[7],xmm3[7] 1835; SSE2-NEXT: punpcklwd {{.*#+}} xmm7 = xmm7[0],xmm4[0],xmm7[1],xmm4[1],xmm7[2],xmm4[2],xmm7[3],xmm4[3] 1836; SSE2-NEXT: punpckldq {{.*#+}} xmm7 = xmm7[0],xmm5[0],xmm7[1],xmm5[1] 1837; SSE2-NEXT: punpcklqdq {{.*#+}} xmm7 = xmm7[0],xmm6[0] 1838; SSE2-NEXT: pcmpeqd %xmm3, %xmm3 1839; SSE2-NEXT: pcmpgtb %xmm3, %xmm0 1840; SSE2-NEXT: pand %xmm2, %xmm0 1841; SSE2-NEXT: pcmpgtb %xmm3, %xmm1 1842; SSE2-NEXT: pand %xmm7, %xmm1 1843; SSE2-NEXT: psllw $7, %xmm1 1844; SSE2-NEXT: pmovmskb %xmm1, %ecx 1845; SSE2-NEXT: shll $16, %ecx 1846; SSE2-NEXT: psllw $7, %xmm0 1847; SSE2-NEXT: pmovmskb %xmm0, %edx 1848; SSE2-NEXT: orl %ecx, %edx 1849; SSE2-NEXT: movl %edx, (%rdi) 1850; SSE2-NEXT: retq 1851; 1852; SSE42-LABEL: is_positive_mask_v32i8_v32i1: 1853; SSE42: # %bb.0: 1854; SSE42-NEXT: movq %rdi, %rax 1855; SSE42-NEXT: movd %esi, %xmm2 1856; SSE42-NEXT: pinsrb $1, %edx, %xmm2 1857; SSE42-NEXT: pinsrb $2, %ecx, %xmm2 1858; SSE42-NEXT: pinsrb $3, %r8d, %xmm2 1859; SSE42-NEXT: pinsrb $4, %r9d, %xmm2 1860; SSE42-NEXT: pinsrb $5, {{[0-9]+}}(%rsp), %xmm2 1861; SSE42-NEXT: pinsrb $6, {{[0-9]+}}(%rsp), %xmm2 1862; SSE42-NEXT: pinsrb $7, {{[0-9]+}}(%rsp), %xmm2 1863; SSE42-NEXT: pinsrb $8, {{[0-9]+}}(%rsp), %xmm2 1864; SSE42-NEXT: pinsrb $9, {{[0-9]+}}(%rsp), %xmm2 1865; SSE42-NEXT: pinsrb $10, {{[0-9]+}}(%rsp), %xmm2 1866; SSE42-NEXT: pinsrb $11, {{[0-9]+}}(%rsp), %xmm2 1867; SSE42-NEXT: pinsrb $12, {{[0-9]+}}(%rsp), %xmm2 1868; SSE42-NEXT: pinsrb $13, {{[0-9]+}}(%rsp), %xmm2 1869; SSE42-NEXT: pinsrb $14, {{[0-9]+}}(%rsp), %xmm2 1870; SSE42-NEXT: pinsrb $15, {{[0-9]+}}(%rsp), %xmm2 1871; SSE42-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero 1872; SSE42-NEXT: pinsrb $1, {{[0-9]+}}(%rsp), %xmm3 1873; SSE42-NEXT: pinsrb $2, {{[0-9]+}}(%rsp), %xmm3 1874; SSE42-NEXT: pinsrb $3, {{[0-9]+}}(%rsp), %xmm3 1875; SSE42-NEXT: pinsrb $4, {{[0-9]+}}(%rsp), %xmm3 1876; SSE42-NEXT: pinsrb $5, {{[0-9]+}}(%rsp), %xmm3 1877; SSE42-NEXT: pinsrb $6, {{[0-9]+}}(%rsp), %xmm3 1878; SSE42-NEXT: pinsrb $7, {{[0-9]+}}(%rsp), %xmm3 1879; SSE42-NEXT: pinsrb $8, {{[0-9]+}}(%rsp), %xmm3 1880; SSE42-NEXT: pinsrb $9, {{[0-9]+}}(%rsp), %xmm3 1881; SSE42-NEXT: pinsrb $10, {{[0-9]+}}(%rsp), %xmm3 1882; SSE42-NEXT: pinsrb $11, {{[0-9]+}}(%rsp), %xmm3 1883; SSE42-NEXT: pinsrb $12, {{[0-9]+}}(%rsp), %xmm3 1884; SSE42-NEXT: pinsrb $13, {{[0-9]+}}(%rsp), %xmm3 1885; SSE42-NEXT: pinsrb $14, {{[0-9]+}}(%rsp), %xmm3 1886; SSE42-NEXT: pinsrb $15, {{[0-9]+}}(%rsp), %xmm3 1887; SSE42-NEXT: pcmpeqd %xmm4, %xmm4 1888; SSE42-NEXT: pcmpgtb %xmm4, %xmm1 1889; SSE42-NEXT: pand %xmm3, %xmm1 1890; SSE42-NEXT: pcmpgtb %xmm4, %xmm0 1891; SSE42-NEXT: pand %xmm2, %xmm0 1892; SSE42-NEXT: psllw $7, %xmm0 1893; SSE42-NEXT: pmovmskb %xmm0, %ecx 1894; SSE42-NEXT: psllw $7, %xmm1 1895; SSE42-NEXT: pmovmskb %xmm1, %edx 1896; SSE42-NEXT: shll $16, %edx 1897; SSE42-NEXT: orl %ecx, %edx 1898; SSE42-NEXT: movl %edx, (%rdi) 1899; SSE42-NEXT: retq 1900; 1901; AVX1-LABEL: is_positive_mask_v32i8_v32i1: 1902; AVX1: # %bb.0: 1903; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 1904; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 1905; AVX1-NEXT: vpcmpgtb %xmm3, %xmm2, %xmm2 1906; AVX1-NEXT: vpcmpgtb %xmm3, %xmm0, %xmm0 1907; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1908; AVX1-NEXT: vandps %ymm0, %ymm1, %ymm0 1909; AVX1-NEXT: retq 1910; 1911; AVX2-LABEL: is_positive_mask_v32i8_v32i1: 1912; AVX2: # %bb.0: 1913; AVX2-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 1914; AVX2-NEXT: vpcmpgtb %ymm2, %ymm0, %ymm0 1915; AVX2-NEXT: vpand %ymm0, %ymm1, %ymm0 1916; AVX2-NEXT: retq 1917; 1918; AVX512F-LABEL: is_positive_mask_v32i8_v32i1: 1919; AVX512F: # %bb.0: 1920; AVX512F-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 1921; AVX512F-NEXT: vpcmpgtb %ymm2, %ymm0, %ymm0 1922; AVX512F-NEXT: vpand %ymm0, %ymm1, %ymm0 1923; AVX512F-NEXT: retq 1924; 1925; AVX512DQBW-LABEL: is_positive_mask_v32i8_v32i1: 1926; AVX512DQBW: # %bb.0: 1927; AVX512DQBW-NEXT: vpsllw $7, %ymm1, %ymm1 1928; AVX512DQBW-NEXT: vpmovb2m %ymm1, %k1 1929; AVX512DQBW-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 1930; AVX512DQBW-NEXT: vpcmpgtb %ymm1, %ymm0, %k0 {%k1} 1931; AVX512DQBW-NEXT: vpmovm2b %k0, %ymm0 1932; AVX512DQBW-NEXT: retq 1933 %cmp = icmp sgt <32 x i8> %x, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 1934 %and = and <32 x i1> %y, %cmp 1935 ret <32 x i1> %and 1936} 1937 1938define <4 x i64> @PR52504(<4 x i16> %t3) { 1939; SSE2-LABEL: PR52504: 1940; SSE2: # %bb.0: 1941; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 1942; SSE2-NEXT: psrad $16, %xmm1 1943; SSE2-NEXT: pxor %xmm2, %xmm2 1944; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 1945; SSE2-NEXT: movdqa %xmm1, %xmm3 1946; SSE2-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm2[2],xmm3[3],xmm2[3] 1947; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 1948; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,0,1,1] 1949; SSE2-NEXT: pcmpeqd %xmm4, %xmm4 1950; SSE2-NEXT: pcmpgtd %xmm4, %xmm0 1951; SSE2-NEXT: pand %xmm0, %xmm1 1952; SSE2-NEXT: pxor %xmm4, %xmm0 1953; SSE2-NEXT: por %xmm1, %xmm0 1954; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,2,3,3] 1955; SSE2-NEXT: pcmpgtd %xmm4, %xmm1 1956; SSE2-NEXT: pand %xmm1, %xmm3 1957; SSE2-NEXT: pxor %xmm4, %xmm1 1958; SSE2-NEXT: por %xmm3, %xmm1 1959; SSE2-NEXT: retq 1960; 1961; SSE42-LABEL: PR52504: 1962; SSE42: # %bb.0: 1963; SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1964; SSE42-NEXT: pmovsxwq %xmm1, %xmm2 1965; SSE42-NEXT: pmovsxwq %xmm0, %xmm3 1966; SSE42-NEXT: pxor %xmm1, %xmm1 1967; SSE42-NEXT: pxor %xmm0, %xmm0 1968; SSE42-NEXT: pcmpgtq %xmm3, %xmm0 1969; SSE42-NEXT: por %xmm3, %xmm0 1970; SSE42-NEXT: pcmpgtq %xmm2, %xmm1 1971; SSE42-NEXT: por %xmm2, %xmm1 1972; SSE42-NEXT: retq 1973; 1974; AVX1-LABEL: PR52504: 1975; AVX1: # %bb.0: 1976; AVX1-NEXT: vpmovsxwq %xmm0, %xmm1 1977; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] 1978; AVX1-NEXT: vpmovsxwq %xmm0, %xmm0 1979; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 1980; AVX1-NEXT: vpcmpgtq %xmm0, %xmm2, %xmm3 1981; AVX1-NEXT: vpor %xmm0, %xmm3, %xmm0 1982; AVX1-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm2 1983; AVX1-NEXT: vpor %xmm1, %xmm2, %xmm1 1984; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1985; AVX1-NEXT: retq 1986; 1987; AVX2-LABEL: PR52504: 1988; AVX2: # %bb.0: 1989; AVX2-NEXT: vpmovsxwq %xmm0, %ymm0 1990; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 1991; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm1 1992; AVX2-NEXT: vpor %ymm0, %ymm1, %ymm0 1993; AVX2-NEXT: retq 1994; 1995; AVX512-LABEL: PR52504: 1996; AVX512: # %bb.0: 1997; AVX512-NEXT: vpmovsxwq %xmm0, %ymm0 1998; AVX512-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 1999; AVX512-NEXT: vpmaxsq %ymm1, %ymm0, %ymm0 2000; AVX512-NEXT: retq 2001 %t14 = sext <4 x i16> %t3 to <4 x i64> 2002 %t15 = icmp sgt <4 x i64> %t14, <i64 -1, i64 -1, i64 -1, i64 -1> 2003 %t16 = select <4 x i1> %t15, <4 x i64> %t14, <4 x i64> <i64 -1, i64 -1, i64 -1, i64 -1> 2004 ret <4 x i64> %t16 2005} 2006