1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2 3; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE,SSE42 4; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1OR2,AVX1 5; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX1OR2,AVX2 6; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=AVX,AVX512 7 8define i64 @test_v2f64_sext(<2 x double> %a0, <2 x double> %a1) { 9; SSE-LABEL: test_v2f64_sext: 10; SSE: # %bb.0: 11; SSE-NEXT: cmpltpd %xmm0, %xmm1 12; SSE-NEXT: movmskpd %xmm1, %ecx 13; SSE-NEXT: xorl %eax, %eax 14; SSE-NEXT: cmpl $3, %ecx 15; SSE-NEXT: sete %al 16; SSE-NEXT: negq %rax 17; SSE-NEXT: retq 18; 19; AVX-LABEL: test_v2f64_sext: 20; AVX: # %bb.0: 21; AVX-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0 22; AVX-NEXT: xorl %eax, %eax 23; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 24; AVX-NEXT: vtestpd %xmm1, %xmm0 25; AVX-NEXT: sbbq %rax, %rax 26; AVX-NEXT: retq 27 %c = fcmp ogt <2 x double> %a0, %a1 28 %s = sext <2 x i1> %c to <2 x i64> 29 %1 = shufflevector <2 x i64> %s, <2 x i64> undef, <2 x i32> <i32 1, i32 undef> 30 %2 = and <2 x i64> %s, %1 31 %3 = extractelement <2 x i64> %2, i32 0 32 ret i64 %3 33} 34 35define i64 @test_v4f64_sext(<4 x double> %a0, <4 x double> %a1) { 36; SSE-LABEL: test_v4f64_sext: 37; SSE: # %bb.0: 38; SSE-NEXT: cmpltpd %xmm1, %xmm3 39; SSE-NEXT: cmpltpd %xmm0, %xmm2 40; SSE-NEXT: andpd %xmm3, %xmm2 41; SSE-NEXT: movmskpd %xmm2, %ecx 42; SSE-NEXT: xorl %eax, %eax 43; SSE-NEXT: cmpl $3, %ecx 44; SSE-NEXT: sete %al 45; SSE-NEXT: negq %rax 46; SSE-NEXT: retq 47; 48; AVX1-LABEL: test_v4f64_sext: 49; AVX1: # %bb.0: 50; AVX1-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0 51; AVX1-NEXT: xorl %eax, %eax 52; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1 53; AVX1-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1 54; AVX1-NEXT: vtestpd %ymm1, %ymm0 55; AVX1-NEXT: sbbq %rax, %rax 56; AVX1-NEXT: vzeroupper 57; AVX1-NEXT: retq 58; 59; AVX2-LABEL: test_v4f64_sext: 60; AVX2: # %bb.0: 61; AVX2-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0 62; AVX2-NEXT: xorl %eax, %eax 63; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 64; AVX2-NEXT: vtestpd %ymm1, %ymm0 65; AVX2-NEXT: sbbq %rax, %rax 66; AVX2-NEXT: vzeroupper 67; AVX2-NEXT: retq 68; 69; AVX512-LABEL: test_v4f64_sext: 70; AVX512: # %bb.0: 71; AVX512-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0 72; AVX512-NEXT: xorl %eax, %eax 73; AVX512-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 74; AVX512-NEXT: vtestpd %ymm1, %ymm0 75; AVX512-NEXT: sbbq %rax, %rax 76; AVX512-NEXT: vzeroupper 77; AVX512-NEXT: retq 78 %c = fcmp ogt <4 x double> %a0, %a1 79 %s = sext <4 x i1> %c to <4 x i64> 80 %1 = shufflevector <4 x i64> %s, <4 x i64> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> 81 %2 = and <4 x i64> %s, %1 82 %3 = shufflevector <4 x i64> %2, <4 x i64> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 83 %4 = and <4 x i64> %2, %3 84 %5 = extractelement <4 x i64> %4, i64 0 85 ret i64 %5 86} 87 88define i64 @test_v4f64_legal_sext(<4 x double> %a0, <4 x double> %a1) { 89; SSE-LABEL: test_v4f64_legal_sext: 90; SSE: # %bb.0: 91; SSE-NEXT: cmpltpd %xmm1, %xmm3 92; SSE-NEXT: cmpltpd %xmm0, %xmm2 93; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2] 94; SSE-NEXT: movmskps %xmm2, %ecx 95; SSE-NEXT: xorl %eax, %eax 96; SSE-NEXT: cmpl $15, %ecx 97; SSE-NEXT: sete %al 98; SSE-NEXT: negq %rax 99; SSE-NEXT: retq 100; 101; AVX1OR2-LABEL: test_v4f64_legal_sext: 102; AVX1OR2: # %bb.0: 103; AVX1OR2-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0 104; AVX1OR2-NEXT: vextractf128 $1, %ymm0, %xmm1 105; AVX1OR2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 106; AVX1OR2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 107; AVX1OR2-NEXT: xorl %eax, %eax 108; AVX1OR2-NEXT: vtestps %xmm1, %xmm0 109; AVX1OR2-NEXT: sbbq %rax, %rax 110; AVX1OR2-NEXT: vzeroupper 111; AVX1OR2-NEXT: retq 112; 113; AVX512-LABEL: test_v4f64_legal_sext: 114; AVX512: # %bb.0: 115; AVX512-NEXT: vcmpltpd %ymm0, %ymm1, %k1 116; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 117; AVX512-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1} {z} 118; AVX512-NEXT: xorl %eax, %eax 119; AVX512-NEXT: vtestps %xmm0, %xmm1 120; AVX512-NEXT: sbbq %rax, %rax 121; AVX512-NEXT: vzeroupper 122; AVX512-NEXT: retq 123 %c = fcmp ogt <4 x double> %a0, %a1 124 %s = sext <4 x i1> %c to <4 x i32> 125 %1 = shufflevector <4 x i32> %s, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> 126 %2 = and <4 x i32> %s, %1 127 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 128 %4 = and <4 x i32> %2, %3 129 %5 = extractelement <4 x i32> %4, i64 0 130 %6 = sext i32 %5 to i64 131 ret i64 %6 132} 133 134define i32 @test_v4f32_sext(<4 x float> %a0, <4 x float> %a1) { 135; SSE-LABEL: test_v4f32_sext: 136; SSE: # %bb.0: 137; SSE-NEXT: cmpltps %xmm0, %xmm1 138; SSE-NEXT: movmskps %xmm1, %ecx 139; SSE-NEXT: xorl %eax, %eax 140; SSE-NEXT: cmpl $15, %ecx 141; SSE-NEXT: sete %al 142; SSE-NEXT: negl %eax 143; SSE-NEXT: retq 144; 145; AVX-LABEL: test_v4f32_sext: 146; AVX: # %bb.0: 147; AVX-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 148; AVX-NEXT: xorl %eax, %eax 149; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 150; AVX-NEXT: vtestps %xmm1, %xmm0 151; AVX-NEXT: sbbl %eax, %eax 152; AVX-NEXT: retq 153 %c = fcmp ogt <4 x float> %a0, %a1 154 %s = sext <4 x i1> %c to <4 x i32> 155 %1 = shufflevector <4 x i32> %s, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> 156 %2 = and <4 x i32> %s, %1 157 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 158 %4 = and <4 x i32> %2, %3 159 %5 = extractelement <4 x i32> %4, i32 0 160 ret i32 %5 161} 162 163define i32 @test_v8f32_sext(<8 x float> %a0, <8 x float> %a1) { 164; SSE-LABEL: test_v8f32_sext: 165; SSE: # %bb.0: 166; SSE-NEXT: cmpltps %xmm1, %xmm3 167; SSE-NEXT: cmpltps %xmm0, %xmm2 168; SSE-NEXT: andps %xmm3, %xmm2 169; SSE-NEXT: movmskps %xmm2, %ecx 170; SSE-NEXT: xorl %eax, %eax 171; SSE-NEXT: cmpl $15, %ecx 172; SSE-NEXT: sete %al 173; SSE-NEXT: negl %eax 174; SSE-NEXT: retq 175; 176; AVX1-LABEL: test_v8f32_sext: 177; AVX1: # %bb.0: 178; AVX1-NEXT: vcmpltps %ymm0, %ymm1, %ymm0 179; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 180; AVX1-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1 181; AVX1-NEXT: xorl %eax, %eax 182; AVX1-NEXT: vtestps %ymm1, %ymm0 183; AVX1-NEXT: sbbl %eax, %eax 184; AVX1-NEXT: vzeroupper 185; AVX1-NEXT: retq 186; 187; AVX2-LABEL: test_v8f32_sext: 188; AVX2: # %bb.0: 189; AVX2-NEXT: vcmpltps %ymm0, %ymm1, %ymm0 190; AVX2-NEXT: xorl %eax, %eax 191; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 192; AVX2-NEXT: vtestps %ymm1, %ymm0 193; AVX2-NEXT: sbbl %eax, %eax 194; AVX2-NEXT: vzeroupper 195; AVX2-NEXT: retq 196; 197; AVX512-LABEL: test_v8f32_sext: 198; AVX512: # %bb.0: 199; AVX512-NEXT: vcmpltps %ymm0, %ymm1, %ymm0 200; AVX512-NEXT: xorl %eax, %eax 201; AVX512-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 202; AVX512-NEXT: vtestps %ymm1, %ymm0 203; AVX512-NEXT: sbbl %eax, %eax 204; AVX512-NEXT: vzeroupper 205; AVX512-NEXT: retq 206 %c = fcmp ogt <8 x float> %a0, %a1 207 %s = sext <8 x i1> %c to <8 x i32> 208 %1 = shufflevector <8 x i32> %s, <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef> 209 %2 = and <8 x i32> %s, %1 210 %3 = shufflevector <8 x i32> %2, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 211 %4 = and <8 x i32> %2, %3 212 %5 = shufflevector <8 x i32> %4, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 213 %6 = and <8 x i32> %4, %5 214 %7 = extractelement <8 x i32> %6, i32 0 215 ret i32 %7 216} 217 218define i32 @test_v8f32_legal_sext(<8 x float> %a0, <8 x float> %a1) { 219; SSE-LABEL: test_v8f32_legal_sext: 220; SSE: # %bb.0: 221; SSE-NEXT: cmpltps %xmm1, %xmm3 222; SSE-NEXT: cmpltps %xmm0, %xmm2 223; SSE-NEXT: packssdw %xmm3, %xmm2 224; SSE-NEXT: pmovmskb %xmm2, %ecx 225; SSE-NEXT: xorl %eax, %eax 226; SSE-NEXT: cmpl $65535, %ecx # imm = 0xFFFF 227; SSE-NEXT: sete %al 228; SSE-NEXT: negl %eax 229; SSE-NEXT: retq 230; 231; AVX1OR2-LABEL: test_v8f32_legal_sext: 232; AVX1OR2: # %bb.0: 233; AVX1OR2-NEXT: vcmpltps %ymm0, %ymm1, %ymm0 234; AVX1OR2-NEXT: vextractf128 $1, %ymm0, %xmm1 235; AVX1OR2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 236; AVX1OR2-NEXT: vpmovmskb %xmm0, %ecx 237; AVX1OR2-NEXT: xorl %eax, %eax 238; AVX1OR2-NEXT: cmpl $65535, %ecx # imm = 0xFFFF 239; AVX1OR2-NEXT: sete %al 240; AVX1OR2-NEXT: negl %eax 241; AVX1OR2-NEXT: vzeroupper 242; AVX1OR2-NEXT: retq 243; 244; AVX512-LABEL: test_v8f32_legal_sext: 245; AVX512: # %bb.0: 246; AVX512-NEXT: vcmpltps %ymm0, %ymm1, %k0 247; AVX512-NEXT: vpmovm2w %k0, %xmm0 248; AVX512-NEXT: vpmovmskb %xmm0, %ecx 249; AVX512-NEXT: xorl %eax, %eax 250; AVX512-NEXT: cmpl $65535, %ecx # imm = 0xFFFF 251; AVX512-NEXT: sete %al 252; AVX512-NEXT: negl %eax 253; AVX512-NEXT: vzeroupper 254; AVX512-NEXT: retq 255 %c = fcmp ogt <8 x float> %a0, %a1 256 %s = sext <8 x i1> %c to <8 x i16> 257 %1 = shufflevector <8 x i16> %s, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef> 258 %2 = and <8 x i16> %s, %1 259 %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 260 %4 = and <8 x i16> %2, %3 261 %5 = shufflevector <8 x i16> %4, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 262 %6 = and <8 x i16> %4, %5 263 %7 = extractelement <8 x i16> %6, i32 0 264 %8 = sext i16 %7 to i32 265 ret i32 %8 266} 267 268define i64 @test_v2i64_sext(<2 x i64> %a0, <2 x i64> %a1) { 269; SSE2-LABEL: test_v2i64_sext: 270; SSE2: # %bb.0: 271; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648] 272; SSE2-NEXT: pxor %xmm2, %xmm1 273; SSE2-NEXT: pxor %xmm2, %xmm0 274; SSE2-NEXT: movdqa %xmm0, %xmm2 275; SSE2-NEXT: pcmpeqd %xmm1, %xmm2 276; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 277; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2] 278; SSE2-NEXT: pand %xmm2, %xmm1 279; SSE2-NEXT: por %xmm0, %xmm1 280; SSE2-NEXT: movmskpd %xmm1, %ecx 281; SSE2-NEXT: xorl %eax, %eax 282; SSE2-NEXT: cmpl $3, %ecx 283; SSE2-NEXT: sete %al 284; SSE2-NEXT: negq %rax 285; SSE2-NEXT: retq 286; 287; SSE42-LABEL: test_v2i64_sext: 288; SSE42: # %bb.0: 289; SSE42-NEXT: pcmpgtq %xmm1, %xmm0 290; SSE42-NEXT: movmskpd %xmm0, %ecx 291; SSE42-NEXT: xorl %eax, %eax 292; SSE42-NEXT: cmpl $3, %ecx 293; SSE42-NEXT: sete %al 294; SSE42-NEXT: negq %rax 295; SSE42-NEXT: retq 296; 297; AVX-LABEL: test_v2i64_sext: 298; AVX: # %bb.0: 299; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 300; AVX-NEXT: xorl %eax, %eax 301; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 302; AVX-NEXT: vtestpd %xmm1, %xmm0 303; AVX-NEXT: sbbq %rax, %rax 304; AVX-NEXT: retq 305 %c = icmp sgt <2 x i64> %a0, %a1 306 %s = sext <2 x i1> %c to <2 x i64> 307 %1 = shufflevector <2 x i64> %s, <2 x i64> undef, <2 x i32> <i32 1, i32 undef> 308 %2 = and <2 x i64> %s, %1 309 %3 = extractelement <2 x i64> %2, i32 0 310 ret i64 %3 311} 312 313define i64 @test_v4i64_sext(<4 x i64> %a0, <4 x i64> %a1) { 314; SSE2-LABEL: test_v4i64_sext: 315; SSE2: # %bb.0: 316; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648] 317; SSE2-NEXT: pxor %xmm4, %xmm3 318; SSE2-NEXT: pxor %xmm4, %xmm1 319; SSE2-NEXT: movdqa %xmm1, %xmm5 320; SSE2-NEXT: pcmpeqd %xmm3, %xmm5 321; SSE2-NEXT: pcmpgtd %xmm3, %xmm1 322; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2] 323; SSE2-NEXT: pand %xmm5, %xmm3 324; SSE2-NEXT: por %xmm1, %xmm3 325; SSE2-NEXT: pxor %xmm4, %xmm2 326; SSE2-NEXT: pxor %xmm4, %xmm0 327; SSE2-NEXT: movdqa %xmm0, %xmm1 328; SSE2-NEXT: pcmpeqd %xmm2, %xmm1 329; SSE2-NEXT: pcmpgtd %xmm2, %xmm0 330; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2] 331; SSE2-NEXT: pand %xmm1, %xmm2 332; SSE2-NEXT: por %xmm0, %xmm2 333; SSE2-NEXT: pand %xmm3, %xmm2 334; SSE2-NEXT: movmskpd %xmm2, %ecx 335; SSE2-NEXT: xorl %eax, %eax 336; SSE2-NEXT: cmpl $3, %ecx 337; SSE2-NEXT: sete %al 338; SSE2-NEXT: negq %rax 339; SSE2-NEXT: retq 340; 341; SSE42-LABEL: test_v4i64_sext: 342; SSE42: # %bb.0: 343; SSE42-NEXT: pcmpgtq %xmm3, %xmm1 344; SSE42-NEXT: pcmpgtq %xmm2, %xmm0 345; SSE42-NEXT: pand %xmm1, %xmm0 346; SSE42-NEXT: movmskpd %xmm0, %ecx 347; SSE42-NEXT: xorl %eax, %eax 348; SSE42-NEXT: cmpl $3, %ecx 349; SSE42-NEXT: sete %al 350; SSE42-NEXT: negq %rax 351; SSE42-NEXT: retq 352; 353; AVX1-LABEL: test_v4i64_sext: 354; AVX1: # %bb.0: 355; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 356; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 357; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 358; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 359; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 360; AVX1-NEXT: xorl %eax, %eax 361; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 362; AVX1-NEXT: vtestpd %xmm1, %xmm0 363; AVX1-NEXT: sbbq %rax, %rax 364; AVX1-NEXT: vzeroupper 365; AVX1-NEXT: retq 366; 367; AVX2-LABEL: test_v4i64_sext: 368; AVX2: # %bb.0: 369; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 370; AVX2-NEXT: xorl %eax, %eax 371; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 372; AVX2-NEXT: vtestpd %ymm1, %ymm0 373; AVX2-NEXT: sbbq %rax, %rax 374; AVX2-NEXT: vzeroupper 375; AVX2-NEXT: retq 376; 377; AVX512-LABEL: test_v4i64_sext: 378; AVX512: # %bb.0: 379; AVX512-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 380; AVX512-NEXT: xorl %eax, %eax 381; AVX512-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 382; AVX512-NEXT: vtestpd %ymm1, %ymm0 383; AVX512-NEXT: sbbq %rax, %rax 384; AVX512-NEXT: vzeroupper 385; AVX512-NEXT: retq 386 %c = icmp sgt <4 x i64> %a0, %a1 387 %s = sext <4 x i1> %c to <4 x i64> 388 %1 = shufflevector <4 x i64> %s, <4 x i64> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> 389 %2 = and <4 x i64> %s, %1 390 %3 = shufflevector <4 x i64> %2, <4 x i64> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 391 %4 = and <4 x i64> %2, %3 392 %5 = extractelement <4 x i64> %4, i64 0 393 ret i64 %5 394} 395 396define i64 @test_v4i64_legal_sext(<4 x i64> %a0, <4 x i64> %a1) { 397; SSE2-LABEL: test_v4i64_legal_sext: 398; SSE2: # %bb.0: 399; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648] 400; SSE2-NEXT: pxor %xmm4, %xmm3 401; SSE2-NEXT: pxor %xmm4, %xmm1 402; SSE2-NEXT: movdqa %xmm1, %xmm5 403; SSE2-NEXT: pcmpgtd %xmm3, %xmm5 404; SSE2-NEXT: pxor %xmm4, %xmm2 405; SSE2-NEXT: pxor %xmm4, %xmm0 406; SSE2-NEXT: movdqa %xmm0, %xmm4 407; SSE2-NEXT: pcmpgtd %xmm2, %xmm4 408; SSE2-NEXT: movdqa %xmm4, %xmm6 409; SSE2-NEXT: shufps {{.*#+}} xmm6 = xmm6[0,2],xmm5[0,2] 410; SSE2-NEXT: pcmpeqd %xmm3, %xmm1 411; SSE2-NEXT: pcmpeqd %xmm2, %xmm0 412; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3] 413; SSE2-NEXT: andps %xmm6, %xmm0 414; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,3],xmm5[1,3] 415; SSE2-NEXT: orps %xmm0, %xmm4 416; SSE2-NEXT: movmskps %xmm4, %ecx 417; SSE2-NEXT: xorl %eax, %eax 418; SSE2-NEXT: cmpl $15, %ecx 419; SSE2-NEXT: sete %al 420; SSE2-NEXT: negq %rax 421; SSE2-NEXT: retq 422; 423; SSE42-LABEL: test_v4i64_legal_sext: 424; SSE42: # %bb.0: 425; SSE42-NEXT: pcmpgtq %xmm3, %xmm1 426; SSE42-NEXT: pcmpgtq %xmm2, %xmm0 427; SSE42-NEXT: packssdw %xmm1, %xmm0 428; SSE42-NEXT: movmskps %xmm0, %ecx 429; SSE42-NEXT: xorl %eax, %eax 430; SSE42-NEXT: cmpl $15, %ecx 431; SSE42-NEXT: sete %al 432; SSE42-NEXT: negq %rax 433; SSE42-NEXT: retq 434; 435; AVX1-LABEL: test_v4i64_legal_sext: 436; AVX1: # %bb.0: 437; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 438; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 439; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 440; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 441; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 442; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 443; AVX1-NEXT: xorl %eax, %eax 444; AVX1-NEXT: vtestps %xmm1, %xmm0 445; AVX1-NEXT: sbbq %rax, %rax 446; AVX1-NEXT: vzeroupper 447; AVX1-NEXT: retq 448; 449; AVX2-LABEL: test_v4i64_legal_sext: 450; AVX2: # %bb.0: 451; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 452; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 453; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 454; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 455; AVX2-NEXT: xorl %eax, %eax 456; AVX2-NEXT: vtestps %xmm1, %xmm0 457; AVX2-NEXT: sbbq %rax, %rax 458; AVX2-NEXT: vzeroupper 459; AVX2-NEXT: retq 460; 461; AVX512-LABEL: test_v4i64_legal_sext: 462; AVX512: # %bb.0: 463; AVX512-NEXT: vpcmpgtq %ymm1, %ymm0, %k1 464; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 465; AVX512-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1} {z} 466; AVX512-NEXT: xorl %eax, %eax 467; AVX512-NEXT: vtestps %xmm0, %xmm1 468; AVX512-NEXT: sbbq %rax, %rax 469; AVX512-NEXT: vzeroupper 470; AVX512-NEXT: retq 471 %c = icmp sgt <4 x i64> %a0, %a1 472 %s = sext <4 x i1> %c to <4 x i32> 473 %1 = shufflevector <4 x i32> %s, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> 474 %2 = and <4 x i32> %s, %1 475 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 476 %4 = and <4 x i32> %2, %3 477 %5 = extractelement <4 x i32> %4, i64 0 478 %6 = sext i32 %5 to i64 479 ret i64 %6 480} 481 482define i32 @test_v4i32_sext(<4 x i32> %a0, <4 x i32> %a1) { 483; SSE-LABEL: test_v4i32_sext: 484; SSE: # %bb.0: 485; SSE-NEXT: pcmpgtd %xmm1, %xmm0 486; SSE-NEXT: movmskps %xmm0, %ecx 487; SSE-NEXT: xorl %eax, %eax 488; SSE-NEXT: cmpl $15, %ecx 489; SSE-NEXT: sete %al 490; SSE-NEXT: negl %eax 491; SSE-NEXT: retq 492; 493; AVX-LABEL: test_v4i32_sext: 494; AVX: # %bb.0: 495; AVX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 496; AVX-NEXT: xorl %eax, %eax 497; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 498; AVX-NEXT: vtestps %xmm1, %xmm0 499; AVX-NEXT: sbbl %eax, %eax 500; AVX-NEXT: retq 501 %c = icmp sgt <4 x i32> %a0, %a1 502 %s = sext <4 x i1> %c to <4 x i32> 503 %1 = shufflevector <4 x i32> %s, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> 504 %2 = and <4 x i32> %s, %1 505 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 506 %4 = and <4 x i32> %2, %3 507 %5 = extractelement <4 x i32> %4, i32 0 508 ret i32 %5 509} 510 511define i32 @test_v8i32_sext(<8 x i32> %a0, <8 x i32> %a1) { 512; SSE-LABEL: test_v8i32_sext: 513; SSE: # %bb.0: 514; SSE-NEXT: pcmpgtd %xmm3, %xmm1 515; SSE-NEXT: pcmpgtd %xmm2, %xmm0 516; SSE-NEXT: pand %xmm1, %xmm0 517; SSE-NEXT: movmskps %xmm0, %ecx 518; SSE-NEXT: xorl %eax, %eax 519; SSE-NEXT: cmpl $15, %ecx 520; SSE-NEXT: sete %al 521; SSE-NEXT: negl %eax 522; SSE-NEXT: retq 523; 524; AVX1-LABEL: test_v8i32_sext: 525; AVX1: # %bb.0: 526; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 527; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 528; AVX1-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm2 529; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 530; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 531; AVX1-NEXT: xorl %eax, %eax 532; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 533; AVX1-NEXT: vtestps %xmm1, %xmm0 534; AVX1-NEXT: sbbl %eax, %eax 535; AVX1-NEXT: vzeroupper 536; AVX1-NEXT: retq 537; 538; AVX2-LABEL: test_v8i32_sext: 539; AVX2: # %bb.0: 540; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 541; AVX2-NEXT: xorl %eax, %eax 542; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 543; AVX2-NEXT: vtestps %ymm1, %ymm0 544; AVX2-NEXT: sbbl %eax, %eax 545; AVX2-NEXT: vzeroupper 546; AVX2-NEXT: retq 547; 548; AVX512-LABEL: test_v8i32_sext: 549; AVX512: # %bb.0: 550; AVX512-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 551; AVX512-NEXT: xorl %eax, %eax 552; AVX512-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 553; AVX512-NEXT: vtestps %ymm1, %ymm0 554; AVX512-NEXT: sbbl %eax, %eax 555; AVX512-NEXT: vzeroupper 556; AVX512-NEXT: retq 557 %c = icmp sgt <8 x i32> %a0, %a1 558 %s = sext <8 x i1> %c to <8 x i32> 559 %1 = shufflevector <8 x i32> %s, <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef> 560 %2 = and <8 x i32> %s, %1 561 %3 = shufflevector <8 x i32> %2, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 562 %4 = and <8 x i32> %2, %3 563 %5 = shufflevector <8 x i32> %4, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 564 %6 = and <8 x i32> %4, %5 565 %7 = extractelement <8 x i32> %6, i32 0 566 ret i32 %7 567} 568 569define i32 @test_v8i32_legal_sext(<8 x i32> %a0, <8 x i32> %a1) { 570; SSE-LABEL: test_v8i32_legal_sext: 571; SSE: # %bb.0: 572; SSE-NEXT: pcmpgtd %xmm3, %xmm1 573; SSE-NEXT: pcmpgtd %xmm2, %xmm0 574; SSE-NEXT: packssdw %xmm1, %xmm0 575; SSE-NEXT: pmovmskb %xmm0, %ecx 576; SSE-NEXT: xorl %eax, %eax 577; SSE-NEXT: cmpl $65535, %ecx # imm = 0xFFFF 578; SSE-NEXT: sete %al 579; SSE-NEXT: negl %eax 580; SSE-NEXT: retq 581; 582; AVX1-LABEL: test_v8i32_legal_sext: 583; AVX1: # %bb.0: 584; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 585; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 586; AVX1-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm2 587; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 588; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 589; AVX1-NEXT: vpmovmskb %xmm0, %ecx 590; AVX1-NEXT: xorl %eax, %eax 591; AVX1-NEXT: cmpl $65535, %ecx # imm = 0xFFFF 592; AVX1-NEXT: sete %al 593; AVX1-NEXT: negl %eax 594; AVX1-NEXT: vzeroupper 595; AVX1-NEXT: retq 596; 597; AVX2-LABEL: test_v8i32_legal_sext: 598; AVX2: # %bb.0: 599; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 600; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 601; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 602; AVX2-NEXT: vpmovmskb %xmm0, %ecx 603; AVX2-NEXT: xorl %eax, %eax 604; AVX2-NEXT: cmpl $65535, %ecx # imm = 0xFFFF 605; AVX2-NEXT: sete %al 606; AVX2-NEXT: negl %eax 607; AVX2-NEXT: vzeroupper 608; AVX2-NEXT: retq 609; 610; AVX512-LABEL: test_v8i32_legal_sext: 611; AVX512: # %bb.0: 612; AVX512-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 613; AVX512-NEXT: vpmovm2w %k0, %xmm0 614; AVX512-NEXT: vpmovmskb %xmm0, %ecx 615; AVX512-NEXT: xorl %eax, %eax 616; AVX512-NEXT: cmpl $65535, %ecx # imm = 0xFFFF 617; AVX512-NEXT: sete %al 618; AVX512-NEXT: negl %eax 619; AVX512-NEXT: vzeroupper 620; AVX512-NEXT: retq 621 %c = icmp sgt <8 x i32> %a0, %a1 622 %s = sext <8 x i1> %c to <8 x i16> 623 %1 = shufflevector <8 x i16> %s, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef> 624 %2 = and <8 x i16> %s, %1 625 %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 626 %4 = and <8 x i16> %2, %3 627 %5 = shufflevector <8 x i16> %4, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 628 %6 = and <8 x i16> %4, %5 629 %7 = extractelement <8 x i16> %6, i32 0 630 %8 = sext i16 %7 to i32 631 ret i32 %8 632} 633 634define i16 @test_v8i16_sext(<8 x i16> %a0, <8 x i16> %a1) { 635; SSE-LABEL: test_v8i16_sext: 636; SSE: # %bb.0: 637; SSE-NEXT: pcmpgtw %xmm1, %xmm0 638; SSE-NEXT: pmovmskb %xmm0, %ecx 639; SSE-NEXT: xorl %eax, %eax 640; SSE-NEXT: cmpl $65535, %ecx # imm = 0xFFFF 641; SSE-NEXT: sete %al 642; SSE-NEXT: negl %eax 643; SSE-NEXT: # kill: def $ax killed $ax killed $eax 644; SSE-NEXT: retq 645; 646; AVX-LABEL: test_v8i16_sext: 647; AVX: # %bb.0: 648; AVX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 649; AVX-NEXT: vpmovmskb %xmm0, %ecx 650; AVX-NEXT: xorl %eax, %eax 651; AVX-NEXT: cmpl $65535, %ecx # imm = 0xFFFF 652; AVX-NEXT: sete %al 653; AVX-NEXT: negl %eax 654; AVX-NEXT: # kill: def $ax killed $ax killed $eax 655; AVX-NEXT: retq 656 %c = icmp sgt <8 x i16> %a0, %a1 657 %s = sext <8 x i1> %c to <8 x i16> 658 %1 = shufflevector <8 x i16> %s, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef> 659 %2 = and <8 x i16> %s, %1 660 %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 661 %4 = and <8 x i16> %2, %3 662 %5 = shufflevector <8 x i16> %4, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 663 %6 = and <8 x i16> %4, %5 664 %7 = extractelement <8 x i16> %6, i32 0 665 ret i16 %7 666} 667 668define i16 @test_v16i16_sext(<16 x i16> %a0, <16 x i16> %a1) { 669; SSE-LABEL: test_v16i16_sext: 670; SSE: # %bb.0: 671; SSE-NEXT: pcmpgtw %xmm3, %xmm1 672; SSE-NEXT: pcmpgtw %xmm2, %xmm0 673; SSE-NEXT: pand %xmm1, %xmm0 674; SSE-NEXT: pmovmskb %xmm0, %ecx 675; SSE-NEXT: xorl %eax, %eax 676; SSE-NEXT: cmpl $65535, %ecx # imm = 0xFFFF 677; SSE-NEXT: sete %al 678; SSE-NEXT: negl %eax 679; SSE-NEXT: # kill: def $ax killed $ax killed $eax 680; SSE-NEXT: retq 681; 682; AVX1-LABEL: test_v16i16_sext: 683; AVX1: # %bb.0: 684; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 685; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 686; AVX1-NEXT: vpcmpgtw %xmm2, %xmm3, %xmm2 687; AVX1-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 688; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 689; AVX1-NEXT: vpmovmskb %xmm0, %ecx 690; AVX1-NEXT: xorl %eax, %eax 691; AVX1-NEXT: cmpl $65535, %ecx # imm = 0xFFFF 692; AVX1-NEXT: sete %al 693; AVX1-NEXT: negl %eax 694; AVX1-NEXT: # kill: def $ax killed $ax killed $eax 695; AVX1-NEXT: vzeroupper 696; AVX1-NEXT: retq 697; 698; AVX2-LABEL: test_v16i16_sext: 699; AVX2: # %bb.0: 700; AVX2-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 701; AVX2-NEXT: vpmovmskb %ymm0, %ecx 702; AVX2-NEXT: xorl %eax, %eax 703; AVX2-NEXT: cmpl $-1, %ecx 704; AVX2-NEXT: sete %al 705; AVX2-NEXT: negl %eax 706; AVX2-NEXT: # kill: def $ax killed $ax killed $eax 707; AVX2-NEXT: vzeroupper 708; AVX2-NEXT: retq 709; 710; AVX512-LABEL: test_v16i16_sext: 711; AVX512: # %bb.0: 712; AVX512-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 713; AVX512-NEXT: vpmovmskb %ymm0, %ecx 714; AVX512-NEXT: xorl %eax, %eax 715; AVX512-NEXT: cmpl $-1, %ecx 716; AVX512-NEXT: sete %al 717; AVX512-NEXT: negl %eax 718; AVX512-NEXT: # kill: def $ax killed $ax killed $eax 719; AVX512-NEXT: vzeroupper 720; AVX512-NEXT: retq 721 %c = icmp sgt <16 x i16> %a0, %a1 722 %s = sext <16 x i1> %c to <16 x i16> 723 %1 = shufflevector <16 x i16> %s, <16 x i16> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 724 %2 = and <16 x i16> %s, %1 725 %3 = shufflevector <16 x i16> %2, <16 x i16> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 726 %4 = and <16 x i16> %2, %3 727 %5 = shufflevector <16 x i16> %4, <16 x i16> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 728 %6 = and <16 x i16> %4, %5 729 %7 = shufflevector <16 x i16> %6, <16 x i16> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 730 %8 = and <16 x i16> %6, %7 731 %9 = extractelement <16 x i16> %8, i32 0 732 ret i16 %9 733} 734 735define i16 @test_v16i16_legal_sext(<16 x i16> %a0, <16 x i16> %a1) { 736; SSE-LABEL: test_v16i16_legal_sext: 737; SSE: # %bb.0: 738; SSE-NEXT: pcmpgtw %xmm3, %xmm1 739; SSE-NEXT: pcmpgtw %xmm2, %xmm0 740; SSE-NEXT: packsswb %xmm1, %xmm0 741; SSE-NEXT: pmovmskb %xmm0, %eax 742; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF 743; SSE-NEXT: sete %al 744; SSE-NEXT: negb %al 745; SSE-NEXT: movsbl %al, %eax 746; SSE-NEXT: # kill: def $ax killed $ax killed $eax 747; SSE-NEXT: retq 748; 749; AVX1-LABEL: test_v16i16_legal_sext: 750; AVX1: # %bb.0: 751; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 752; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 753; AVX1-NEXT: vpcmpgtw %xmm2, %xmm3, %xmm2 754; AVX1-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 755; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0 756; AVX1-NEXT: vpmovmskb %xmm0, %eax 757; AVX1-NEXT: cmpl $65535, %eax # imm = 0xFFFF 758; AVX1-NEXT: sete %al 759; AVX1-NEXT: negb %al 760; AVX1-NEXT: movsbl %al, %eax 761; AVX1-NEXT: # kill: def $ax killed $ax killed $eax 762; AVX1-NEXT: vzeroupper 763; AVX1-NEXT: retq 764; 765; AVX2-LABEL: test_v16i16_legal_sext: 766; AVX2: # %bb.0: 767; AVX2-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 768; AVX2-NEXT: vpmovmskb %ymm0, %eax 769; AVX2-NEXT: cmpl $-1, %eax 770; AVX2-NEXT: sete %al 771; AVX2-NEXT: negb %al 772; AVX2-NEXT: movsbl %al, %eax 773; AVX2-NEXT: # kill: def $ax killed $ax killed $eax 774; AVX2-NEXT: vzeroupper 775; AVX2-NEXT: retq 776; 777; AVX512-LABEL: test_v16i16_legal_sext: 778; AVX512: # %bb.0: 779; AVX512-NEXT: vpcmpgtw %ymm1, %ymm0, %k0 780; AVX512-NEXT: vpmovm2b %k0, %xmm0 781; AVX512-NEXT: vpmovmskb %xmm0, %eax 782; AVX512-NEXT: cmpl $65535, %eax # imm = 0xFFFF 783; AVX512-NEXT: sete %al 784; AVX512-NEXT: negb %al 785; AVX512-NEXT: movsbl %al, %eax 786; AVX512-NEXT: # kill: def $ax killed $ax killed $eax 787; AVX512-NEXT: vzeroupper 788; AVX512-NEXT: retq 789 %c = icmp sgt <16 x i16> %a0, %a1 790 %s = sext <16 x i1> %c to <16 x i8> 791 %1 = shufflevector <16 x i8> %s, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 792 %2 = and <16 x i8> %s, %1 793 %3 = shufflevector <16 x i8> %2, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 794 %4 = and <16 x i8> %2, %3 795 %5 = shufflevector <16 x i8> %4, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 796 %6 = and <16 x i8> %4, %5 797 %7 = shufflevector <16 x i8> %6, <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 798 %8 = and <16 x i8> %6, %7 799 %9 = extractelement <16 x i8> %8, i32 0 800 %10 = sext i8 %9 to i16 801 ret i16 %10 802} 803 804define i8 @test_v16i8_sext(<16 x i8> %a0, <16 x i8> %a1) { 805; SSE-LABEL: test_v16i8_sext: 806; SSE: # %bb.0: 807; SSE-NEXT: pcmpgtb %xmm1, %xmm0 808; SSE-NEXT: pmovmskb %xmm0, %eax 809; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF 810; SSE-NEXT: sete %al 811; SSE-NEXT: negb %al 812; SSE-NEXT: retq 813; 814; AVX-LABEL: test_v16i8_sext: 815; AVX: # %bb.0: 816; AVX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0 817; AVX-NEXT: vpmovmskb %xmm0, %eax 818; AVX-NEXT: cmpl $65535, %eax # imm = 0xFFFF 819; AVX-NEXT: sete %al 820; AVX-NEXT: negb %al 821; AVX-NEXT: retq 822 %c = icmp sgt <16 x i8> %a0, %a1 823 %s = sext <16 x i1> %c to <16 x i8> 824 %1 = shufflevector <16 x i8> %s, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 825 %2 = and <16 x i8> %s, %1 826 %3 = shufflevector <16 x i8> %2, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 827 %4 = and <16 x i8> %2, %3 828 %5 = shufflevector <16 x i8> %4, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 829 %6 = and <16 x i8> %4, %5 830 %7 = shufflevector <16 x i8> %6, <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 831 %8 = and <16 x i8> %6, %7 832 %9 = extractelement <16 x i8> %8, i32 0 833 ret i8 %9 834} 835 836define i8 @test_v32i8_sext(<32 x i8> %a0, <32 x i8> %a1) { 837; SSE-LABEL: test_v32i8_sext: 838; SSE: # %bb.0: 839; SSE-NEXT: pcmpgtb %xmm3, %xmm1 840; SSE-NEXT: pcmpgtb %xmm2, %xmm0 841; SSE-NEXT: pand %xmm1, %xmm0 842; SSE-NEXT: pmovmskb %xmm0, %eax 843; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF 844; SSE-NEXT: sete %al 845; SSE-NEXT: negb %al 846; SSE-NEXT: retq 847; 848; AVX1-LABEL: test_v32i8_sext: 849; AVX1: # %bb.0: 850; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 851; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 852; AVX1-NEXT: vpcmpgtb %xmm2, %xmm3, %xmm2 853; AVX1-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0 854; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 855; AVX1-NEXT: vpmovmskb %xmm0, %eax 856; AVX1-NEXT: cmpl $65535, %eax # imm = 0xFFFF 857; AVX1-NEXT: sete %al 858; AVX1-NEXT: negb %al 859; AVX1-NEXT: vzeroupper 860; AVX1-NEXT: retq 861; 862; AVX2-LABEL: test_v32i8_sext: 863; AVX2: # %bb.0: 864; AVX2-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 865; AVX2-NEXT: vpmovmskb %ymm0, %eax 866; AVX2-NEXT: cmpl $-1, %eax 867; AVX2-NEXT: sete %al 868; AVX2-NEXT: negb %al 869; AVX2-NEXT: vzeroupper 870; AVX2-NEXT: retq 871; 872; AVX512-LABEL: test_v32i8_sext: 873; AVX512: # %bb.0: 874; AVX512-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 875; AVX512-NEXT: vpmovmskb %ymm0, %eax 876; AVX512-NEXT: cmpl $-1, %eax 877; AVX512-NEXT: sete %al 878; AVX512-NEXT: negb %al 879; AVX512-NEXT: vzeroupper 880; AVX512-NEXT: retq 881 %c = icmp sgt <32 x i8> %a0, %a1 882 %s = sext <32 x i1> %c to <32 x i8> 883 %1 = shufflevector <32 x i8> %s, <32 x i8> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 884 %2 = and <32 x i8> %s, %1 885 %3 = shufflevector <32 x i8> %2, <32 x i8> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 886 %4 = and <32 x i8> %2, %3 887 %5 = shufflevector <32 x i8> %4, <32 x i8> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 888 %6 = and <32 x i8> %4, %5 889 %7 = shufflevector <32 x i8> %6, <32 x i8> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 890 %8 = and <32 x i8> %6, %7 891 %9 = shufflevector <32 x i8> %8, <32 x i8> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 892 %10 = and <32 x i8> %8, %9 893 %11 = extractelement <32 x i8> %10, i32 0 894 ret i8 %11 895} 896 897; Should not "MOVMSK(PCMPEQ(..)) -> PTESTZ(..)" when cmp result has muti-uses. 898define i32 @test_v32i8_muti_uses(<32 x i8> %x, <32 x i8>%y, i32 %z) { 899; SSE-LABEL: test_v32i8_muti_uses: 900; SSE: # %bb.0: 901; SSE-NEXT: pcmpeqb %xmm2, %xmm0 902; SSE-NEXT: pmovmskb %xmm0, %eax 903; SSE-NEXT: pcmpeqb %xmm3, %xmm1 904; SSE-NEXT: pmovmskb %xmm1, %ecx 905; SSE-NEXT: shll $16, %ecx 906; SSE-NEXT: orl %eax, %ecx 907; SSE-NEXT: cmpl $-1, %ecx 908; SSE-NEXT: movl $16, %eax 909; SSE-NEXT: cmovnel %ecx, %eax 910; SSE-NEXT: retq 911; 912; AVX1-LABEL: test_v32i8_muti_uses: 913; AVX1: # %bb.0: 914; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm2 915; AVX1-NEXT: vpmovmskb %xmm2, %eax 916; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 917; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 918; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 919; AVX1-NEXT: vpmovmskb %xmm0, %ecx 920; AVX1-NEXT: shll $16, %ecx 921; AVX1-NEXT: orl %eax, %ecx 922; AVX1-NEXT: cmpl $-1, %ecx 923; AVX1-NEXT: movl $16, %eax 924; AVX1-NEXT: cmovnel %ecx, %eax 925; AVX1-NEXT: vzeroupper 926; AVX1-NEXT: retq 927; 928; AVX2-LABEL: test_v32i8_muti_uses: 929; AVX2: # %bb.0: 930; AVX2-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 931; AVX2-NEXT: vpmovmskb %ymm0, %ecx 932; AVX2-NEXT: cmpl $-1, %ecx 933; AVX2-NEXT: movl $16, %eax 934; AVX2-NEXT: cmovnel %ecx, %eax 935; AVX2-NEXT: vzeroupper 936; AVX2-NEXT: retq 937; 938; AVX512-LABEL: test_v32i8_muti_uses: 939; AVX512: # %bb.0: 940; AVX512-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 941; AVX512-NEXT: kortestd %k0, %k0 942; AVX512-NEXT: kmovd %k0, %ecx 943; AVX512-NEXT: movl $16, %eax 944; AVX512-NEXT: cmovael %ecx, %eax 945; AVX512-NEXT: vzeroupper 946; AVX512-NEXT: retq 947 %a = icmp eq <32 x i8> %x, %y 948 %b = bitcast <32 x i1> %a to i32 949 %c = icmp eq i32 %b, -1 950 %res = select i1 %c, i32 16, i32 %b 951 ret i32 %res 952} 953 954define i1 @bool_reduction_v2f64(<2 x double> %x, <2 x double> %y) { 955; SSE-LABEL: bool_reduction_v2f64: 956; SSE: # %bb.0: 957; SSE-NEXT: cmpltpd %xmm0, %xmm1 958; SSE-NEXT: movmskpd %xmm1, %eax 959; SSE-NEXT: cmpl $3, %eax 960; SSE-NEXT: sete %al 961; SSE-NEXT: retq 962; 963; AVX1OR2-LABEL: bool_reduction_v2f64: 964; AVX1OR2: # %bb.0: 965; AVX1OR2-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0 966; AVX1OR2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 967; AVX1OR2-NEXT: vtestpd %xmm1, %xmm0 968; AVX1OR2-NEXT: setb %al 969; AVX1OR2-NEXT: retq 970; 971; AVX512-LABEL: bool_reduction_v2f64: 972; AVX512: # %bb.0: 973; AVX512-NEXT: vcmpltpd %xmm0, %xmm1, %k0 974; AVX512-NEXT: kmovd %k0, %eax 975; AVX512-NEXT: cmpb $3, %al 976; AVX512-NEXT: sete %al 977; AVX512-NEXT: retq 978 %a = fcmp ogt <2 x double> %x, %y 979 %b = shufflevector <2 x i1> %a, <2 x i1> undef, <2 x i32> <i32 1, i32 undef> 980 %c = and <2 x i1> %a, %b 981 %d = extractelement <2 x i1> %c, i32 0 982 ret i1 %d 983} 984 985define i1 @bool_reduction_v4f32(<4 x float> %x, <4 x float> %y) { 986; SSE-LABEL: bool_reduction_v4f32: 987; SSE: # %bb.0: 988; SSE-NEXT: cmpeqps %xmm1, %xmm0 989; SSE-NEXT: movmskps %xmm0, %eax 990; SSE-NEXT: cmpl $15, %eax 991; SSE-NEXT: sete %al 992; SSE-NEXT: retq 993; 994; AVX1OR2-LABEL: bool_reduction_v4f32: 995; AVX1OR2: # %bb.0: 996; AVX1OR2-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0 997; AVX1OR2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 998; AVX1OR2-NEXT: vtestps %xmm1, %xmm0 999; AVX1OR2-NEXT: setb %al 1000; AVX1OR2-NEXT: retq 1001; 1002; AVX512-LABEL: bool_reduction_v4f32: 1003; AVX512: # %bb.0: 1004; AVX512-NEXT: vcmpeqps %xmm1, %xmm0, %k0 1005; AVX512-NEXT: kmovd %k0, %eax 1006; AVX512-NEXT: cmpb $15, %al 1007; AVX512-NEXT: sete %al 1008; AVX512-NEXT: retq 1009 %a = fcmp oeq <4 x float> %x, %y 1010 %s1 = shufflevector <4 x i1> %a, <4 x i1> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> 1011 %b = and <4 x i1> %s1, %a 1012 %s2 = shufflevector <4 x i1> %b, <4 x i1> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 1013 %c = and <4 x i1> %s2, %b 1014 %d = extractelement <4 x i1> %c, i32 0 1015 ret i1 %d 1016} 1017 1018define i1 @bool_reduction_v4f64(<4 x double> %x, <4 x double> %y) { 1019; SSE-LABEL: bool_reduction_v4f64: 1020; SSE: # %bb.0: 1021; SSE-NEXT: cmplepd %xmm1, %xmm3 1022; SSE-NEXT: cmplepd %xmm0, %xmm2 1023; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2] 1024; SSE-NEXT: movmskps %xmm2, %eax 1025; SSE-NEXT: cmpl $15, %eax 1026; SSE-NEXT: sete %al 1027; SSE-NEXT: retq 1028; 1029; AVX1-LABEL: bool_reduction_v4f64: 1030; AVX1: # %bb.0: 1031; AVX1-NEXT: vcmplepd %ymm0, %ymm1, %ymm0 1032; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1 1033; AVX1-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1 1034; AVX1-NEXT: vtestpd %ymm1, %ymm0 1035; AVX1-NEXT: setb %al 1036; AVX1-NEXT: vzeroupper 1037; AVX1-NEXT: retq 1038; 1039; AVX2-LABEL: bool_reduction_v4f64: 1040; AVX2: # %bb.0: 1041; AVX2-NEXT: vcmplepd %ymm0, %ymm1, %ymm0 1042; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 1043; AVX2-NEXT: vtestpd %ymm1, %ymm0 1044; AVX2-NEXT: setb %al 1045; AVX2-NEXT: vzeroupper 1046; AVX2-NEXT: retq 1047; 1048; AVX512-LABEL: bool_reduction_v4f64: 1049; AVX512: # %bb.0: 1050; AVX512-NEXT: vcmplepd %ymm0, %ymm1, %k0 1051; AVX512-NEXT: kmovd %k0, %eax 1052; AVX512-NEXT: cmpb $15, %al 1053; AVX512-NEXT: sete %al 1054; AVX512-NEXT: vzeroupper 1055; AVX512-NEXT: retq 1056 %a = fcmp oge <4 x double> %x, %y 1057 %s1 = shufflevector <4 x i1> %a, <4 x i1> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> 1058 %b = and <4 x i1> %s1, %a 1059 %s2 = shufflevector <4 x i1> %b, <4 x i1> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 1060 %c = and <4 x i1> %s2, %b 1061 %d = extractelement <4 x i1> %c, i32 0 1062 ret i1 %d 1063} 1064 1065define i1 @bool_reduction_v8f32(<8 x float> %x, <8 x float> %y) { 1066; SSE-LABEL: bool_reduction_v8f32: 1067; SSE: # %bb.0: 1068; SSE-NEXT: cmpneqps %xmm3, %xmm1 1069; SSE-NEXT: cmpneqps %xmm2, %xmm0 1070; SSE-NEXT: packssdw %xmm1, %xmm0 1071; SSE-NEXT: packsswb %xmm0, %xmm0 1072; SSE-NEXT: pmovmskb %xmm0, %eax 1073; SSE-NEXT: cmpb $-1, %al 1074; SSE-NEXT: sete %al 1075; SSE-NEXT: retq 1076; 1077; AVX1-LABEL: bool_reduction_v8f32: 1078; AVX1: # %bb.0: 1079; AVX1-NEXT: vcmpneqps %ymm1, %ymm0, %ymm0 1080; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 1081; AVX1-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1 1082; AVX1-NEXT: vtestps %ymm1, %ymm0 1083; AVX1-NEXT: setb %al 1084; AVX1-NEXT: vzeroupper 1085; AVX1-NEXT: retq 1086; 1087; AVX2-LABEL: bool_reduction_v8f32: 1088; AVX2: # %bb.0: 1089; AVX2-NEXT: vcmpneqps %ymm1, %ymm0, %ymm0 1090; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 1091; AVX2-NEXT: vtestps %ymm1, %ymm0 1092; AVX2-NEXT: setb %al 1093; AVX2-NEXT: vzeroupper 1094; AVX2-NEXT: retq 1095; 1096; AVX512-LABEL: bool_reduction_v8f32: 1097; AVX512: # %bb.0: 1098; AVX512-NEXT: vcmpneqps %ymm1, %ymm0, %k0 1099; AVX512-NEXT: kmovd %k0, %eax 1100; AVX512-NEXT: cmpb $-1, %al 1101; AVX512-NEXT: sete %al 1102; AVX512-NEXT: vzeroupper 1103; AVX512-NEXT: retq 1104 %a = fcmp une <8 x float> %x, %y 1105 %s1 = shufflevector <8 x i1> %a, <8 x i1> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef> 1106 %b = and <8 x i1> %s1, %a 1107 %s2 = shufflevector <8 x i1> %b, <8 x i1> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1108 %c = and <8 x i1> %s2, %b 1109 %s3 = shufflevector <8 x i1> %c, <8 x i1> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1110 %d = and <8 x i1> %s3, %c 1111 %e = extractelement <8 x i1> %d, i32 0 1112 ret i1 %e 1113} 1114 1115define i1 @bool_reduction_v2i64(<2 x i64> %x, <2 x i64> %y) { 1116; SSE2-LABEL: bool_reduction_v2i64: 1117; SSE2: # %bb.0: 1118; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 1119; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] 1120; SSE2-NEXT: pand %xmm0, %xmm1 1121; SSE2-NEXT: movmskpd %xmm1, %eax 1122; SSE2-NEXT: testl %eax, %eax 1123; SSE2-NEXT: sete %al 1124; SSE2-NEXT: retq 1125; 1126; SSE42-LABEL: bool_reduction_v2i64: 1127; SSE42: # %bb.0: 1128; SSE42-NEXT: pcmpeqq %xmm1, %xmm0 1129; SSE42-NEXT: movmskpd %xmm0, %eax 1130; SSE42-NEXT: testl %eax, %eax 1131; SSE42-NEXT: sete %al 1132; SSE42-NEXT: retq 1133; 1134; AVX1OR2-LABEL: bool_reduction_v2i64: 1135; AVX1OR2: # %bb.0: 1136; AVX1OR2-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 1137; AVX1OR2-NEXT: vtestpd %xmm0, %xmm0 1138; AVX1OR2-NEXT: sete %al 1139; AVX1OR2-NEXT: retq 1140; 1141; AVX512-LABEL: bool_reduction_v2i64: 1142; AVX512: # %bb.0: 1143; AVX512-NEXT: vpcmpneqq %xmm1, %xmm0, %k0 1144; AVX512-NEXT: kmovd %k0, %eax 1145; AVX512-NEXT: cmpb $3, %al 1146; AVX512-NEXT: sete %al 1147; AVX512-NEXT: retq 1148 %a = icmp ne <2 x i64> %x, %y 1149 %b = shufflevector <2 x i1> %a, <2 x i1> undef, <2 x i32> <i32 1, i32 undef> 1150 %c = and <2 x i1> %a, %b 1151 %d = extractelement <2 x i1> %c, i32 0 1152 ret i1 %d 1153} 1154 1155define i1 @bool_reduction_v4i32(<4 x i32> %x, <4 x i32> %y) { 1156; SSE2-LABEL: bool_reduction_v4i32: 1157; SSE2: # %bb.0: 1158; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] 1159; SSE2-NEXT: pxor %xmm2, %xmm1 1160; SSE2-NEXT: pxor %xmm2, %xmm0 1161; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 1162; SSE2-NEXT: movmskps %xmm0, %eax 1163; SSE2-NEXT: cmpl $15, %eax 1164; SSE2-NEXT: sete %al 1165; SSE2-NEXT: retq 1166; 1167; SSE42-LABEL: bool_reduction_v4i32: 1168; SSE42: # %bb.0: 1169; SSE42-NEXT: pminud %xmm0, %xmm1 1170; SSE42-NEXT: pcmpeqd %xmm0, %xmm1 1171; SSE42-NEXT: movmskps %xmm1, %eax 1172; SSE42-NEXT: testl %eax, %eax 1173; SSE42-NEXT: sete %al 1174; SSE42-NEXT: retq 1175; 1176; AVX1OR2-LABEL: bool_reduction_v4i32: 1177; AVX1OR2: # %bb.0: 1178; AVX1OR2-NEXT: vpminud %xmm1, %xmm0, %xmm1 1179; AVX1OR2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1180; AVX1OR2-NEXT: vtestps %xmm0, %xmm0 1181; AVX1OR2-NEXT: sete %al 1182; AVX1OR2-NEXT: retq 1183; 1184; AVX512-LABEL: bool_reduction_v4i32: 1185; AVX512: # %bb.0: 1186; AVX512-NEXT: vpcmpnleud %xmm1, %xmm0, %k0 1187; AVX512-NEXT: kmovd %k0, %eax 1188; AVX512-NEXT: cmpb $15, %al 1189; AVX512-NEXT: sete %al 1190; AVX512-NEXT: retq 1191 %a = icmp ugt <4 x i32> %x, %y 1192 %s1 = shufflevector <4 x i1> %a, <4 x i1> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> 1193 %b = and <4 x i1> %s1, %a 1194 %s2 = shufflevector <4 x i1> %b, <4 x i1> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 1195 %c = and <4 x i1> %s2, %b 1196 %d = extractelement <4 x i1> %c, i32 0 1197 ret i1 %d 1198} 1199 1200define i1 @bool_reduction_v8i16(<8 x i16> %x, <8 x i16> %y) { 1201; SSE-LABEL: bool_reduction_v8i16: 1202; SSE: # %bb.0: 1203; SSE-NEXT: pcmpgtw %xmm0, %xmm1 1204; SSE-NEXT: packsswb %xmm1, %xmm1 1205; SSE-NEXT: pmovmskb %xmm1, %eax 1206; SSE-NEXT: cmpb $-1, %al 1207; SSE-NEXT: sete %al 1208; SSE-NEXT: retq 1209; 1210; AVX1OR2-LABEL: bool_reduction_v8i16: 1211; AVX1OR2: # %bb.0: 1212; AVX1OR2-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 1213; AVX1OR2-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 1214; AVX1OR2-NEXT: vpmovmskb %xmm0, %eax 1215; AVX1OR2-NEXT: cmpb $-1, %al 1216; AVX1OR2-NEXT: sete %al 1217; AVX1OR2-NEXT: retq 1218; 1219; AVX512-LABEL: bool_reduction_v8i16: 1220; AVX512: # %bb.0: 1221; AVX512-NEXT: vpcmpgtw %xmm0, %xmm1, %k0 1222; AVX512-NEXT: kmovd %k0, %eax 1223; AVX512-NEXT: cmpb $-1, %al 1224; AVX512-NEXT: sete %al 1225; AVX512-NEXT: retq 1226 %a = icmp slt <8 x i16> %x, %y 1227 %s1 = shufflevector <8 x i1> %a, <8 x i1> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef> 1228 %b = and <8 x i1> %s1, %a 1229 %s2 = shufflevector <8 x i1> %b, <8 x i1> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1230 %c = and <8 x i1> %s2, %b 1231 %s3 = shufflevector <8 x i1> %c, <8 x i1> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1232 %d = and <8 x i1> %s3, %c 1233 %e = extractelement <8 x i1> %d, i32 0 1234 ret i1 %e 1235} 1236 1237define i1 @bool_reduction_v16i8(<16 x i8> %x, <16 x i8> %y) { 1238; SSE-LABEL: bool_reduction_v16i8: 1239; SSE: # %bb.0: 1240; SSE-NEXT: pcmpgtb %xmm1, %xmm0 1241; SSE-NEXT: pmovmskb %xmm0, %eax 1242; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF 1243; SSE-NEXT: sete %al 1244; SSE-NEXT: retq 1245; 1246; AVX1OR2-LABEL: bool_reduction_v16i8: 1247; AVX1OR2: # %bb.0: 1248; AVX1OR2-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0 1249; AVX1OR2-NEXT: vpmovmskb %xmm0, %eax 1250; AVX1OR2-NEXT: cmpl $65535, %eax # imm = 0xFFFF 1251; AVX1OR2-NEXT: sete %al 1252; AVX1OR2-NEXT: retq 1253; 1254; AVX512-LABEL: bool_reduction_v16i8: 1255; AVX512: # %bb.0: 1256; AVX512-NEXT: vpcmpgtb %xmm1, %xmm0, %k0 1257; AVX512-NEXT: kortestw %k0, %k0 1258; AVX512-NEXT: setb %al 1259; AVX512-NEXT: retq 1260 %a = icmp sgt <16 x i8> %x, %y 1261 %s1 = shufflevector <16 x i1> %a, <16 x i1> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1262 %b = and <16 x i1> %s1, %a 1263 %s2 = shufflevector <16 x i1> %b, <16 x i1> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1264 %c = and <16 x i1> %s2, %b 1265 %s3 = shufflevector <16 x i1> %c, <16 x i1> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1266 %d = and <16 x i1> %s3, %c 1267 %s4 = shufflevector <16 x i1> %d, <16 x i1> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1268 %e = and <16 x i1> %s4, %d 1269 %f = extractelement <16 x i1> %e, i32 0 1270 ret i1 %f 1271} 1272 1273define i1 @bool_reduction_v4i64(<4 x i64> %x, <4 x i64> %y) { 1274; SSE2-LABEL: bool_reduction_v4i64: 1275; SSE2: # %bb.0: 1276; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648] 1277; SSE2-NEXT: pxor %xmm4, %xmm1 1278; SSE2-NEXT: pxor %xmm4, %xmm3 1279; SSE2-NEXT: movdqa %xmm3, %xmm5 1280; SSE2-NEXT: pcmpgtd %xmm1, %xmm5 1281; SSE2-NEXT: pxor %xmm4, %xmm0 1282; SSE2-NEXT: pxor %xmm4, %xmm2 1283; SSE2-NEXT: movdqa %xmm2, %xmm4 1284; SSE2-NEXT: pcmpgtd %xmm0, %xmm4 1285; SSE2-NEXT: movdqa %xmm4, %xmm6 1286; SSE2-NEXT: shufps {{.*#+}} xmm6 = xmm6[0,2],xmm5[0,2] 1287; SSE2-NEXT: pcmpeqd %xmm1, %xmm3 1288; SSE2-NEXT: pcmpeqd %xmm0, %xmm2 1289; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,3],xmm3[1,3] 1290; SSE2-NEXT: andps %xmm6, %xmm2 1291; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,3],xmm5[1,3] 1292; SSE2-NEXT: orps %xmm2, %xmm4 1293; SSE2-NEXT: movmskps %xmm4, %eax 1294; SSE2-NEXT: cmpl $15, %eax 1295; SSE2-NEXT: sete %al 1296; SSE2-NEXT: retq 1297; 1298; SSE42-LABEL: bool_reduction_v4i64: 1299; SSE42: # %bb.0: 1300; SSE42-NEXT: pcmpgtq %xmm1, %xmm3 1301; SSE42-NEXT: pcmpgtq %xmm0, %xmm2 1302; SSE42-NEXT: packssdw %xmm3, %xmm2 1303; SSE42-NEXT: movmskps %xmm2, %eax 1304; SSE42-NEXT: cmpl $15, %eax 1305; SSE42-NEXT: sete %al 1306; SSE42-NEXT: retq 1307; 1308; AVX1-LABEL: bool_reduction_v4i64: 1309; AVX1: # %bb.0: 1310; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 1311; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 1312; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 1313; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 1314; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 1315; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 1316; AVX1-NEXT: vtestpd %xmm1, %xmm0 1317; AVX1-NEXT: setb %al 1318; AVX1-NEXT: vzeroupper 1319; AVX1-NEXT: retq 1320; 1321; AVX2-LABEL: bool_reduction_v4i64: 1322; AVX2: # %bb.0: 1323; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 1324; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 1325; AVX2-NEXT: vtestpd %ymm1, %ymm0 1326; AVX2-NEXT: setb %al 1327; AVX2-NEXT: vzeroupper 1328; AVX2-NEXT: retq 1329; 1330; AVX512-LABEL: bool_reduction_v4i64: 1331; AVX512: # %bb.0: 1332; AVX512-NEXT: vpcmpgtq %ymm0, %ymm1, %k0 1333; AVX512-NEXT: kmovd %k0, %eax 1334; AVX512-NEXT: cmpb $15, %al 1335; AVX512-NEXT: sete %al 1336; AVX512-NEXT: vzeroupper 1337; AVX512-NEXT: retq 1338 %a = icmp slt <4 x i64> %x, %y 1339 %s1 = shufflevector <4 x i1> %a, <4 x i1> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> 1340 %b = and <4 x i1> %s1, %a 1341 %s2 = shufflevector <4 x i1> %b, <4 x i1> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 1342 %c = and <4 x i1> %s2, %b 1343 %d = extractelement <4 x i1> %c, i32 0 1344 ret i1 %d 1345} 1346 1347define i1 @bool_reduction_v8i32(<8 x i32> %x, <8 x i32> %y) { 1348; SSE2-LABEL: bool_reduction_v8i32: 1349; SSE2: # %bb.0: 1350; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648] 1351; SSE2-NEXT: pxor %xmm4, %xmm3 1352; SSE2-NEXT: pxor %xmm4, %xmm1 1353; SSE2-NEXT: pcmpgtd %xmm3, %xmm1 1354; SSE2-NEXT: pxor %xmm4, %xmm2 1355; SSE2-NEXT: pxor %xmm4, %xmm0 1356; SSE2-NEXT: pcmpgtd %xmm2, %xmm0 1357; SSE2-NEXT: packssdw %xmm1, %xmm0 1358; SSE2-NEXT: packsswb %xmm0, %xmm0 1359; SSE2-NEXT: pmovmskb %xmm0, %eax 1360; SSE2-NEXT: notl %eax 1361; SSE2-NEXT: cmpb $-1, %al 1362; SSE2-NEXT: sete %al 1363; SSE2-NEXT: retq 1364; 1365; SSE42-LABEL: bool_reduction_v8i32: 1366; SSE42: # %bb.0: 1367; SSE42-NEXT: pminud %xmm1, %xmm3 1368; SSE42-NEXT: pcmpeqd %xmm1, %xmm3 1369; SSE42-NEXT: pminud %xmm0, %xmm2 1370; SSE42-NEXT: pcmpeqd %xmm0, %xmm2 1371; SSE42-NEXT: packssdw %xmm3, %xmm2 1372; SSE42-NEXT: packsswb %xmm2, %xmm2 1373; SSE42-NEXT: pmovmskb %xmm2, %eax 1374; SSE42-NEXT: cmpb $-1, %al 1375; SSE42-NEXT: sete %al 1376; SSE42-NEXT: retq 1377; 1378; AVX1-LABEL: bool_reduction_v8i32: 1379; AVX1: # %bb.0: 1380; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1381; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1382; AVX1-NEXT: vpminud %xmm2, %xmm3, %xmm2 1383; AVX1-NEXT: vpxor %xmm2, %xmm3, %xmm2 1384; AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm1 1385; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 1386; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0 1387; AVX1-NEXT: vptest %xmm0, %xmm0 1388; AVX1-NEXT: sete %al 1389; AVX1-NEXT: vzeroupper 1390; AVX1-NEXT: retq 1391; 1392; AVX2-LABEL: bool_reduction_v8i32: 1393; AVX2: # %bb.0: 1394; AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm1 1395; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0 1396; AVX2-NEXT: vptest %ymm0, %ymm0 1397; AVX2-NEXT: sete %al 1398; AVX2-NEXT: vzeroupper 1399; AVX2-NEXT: retq 1400; 1401; AVX512-LABEL: bool_reduction_v8i32: 1402; AVX512: # %bb.0: 1403; AVX512-NEXT: vpcmpleud %ymm1, %ymm0, %k0 1404; AVX512-NEXT: kmovd %k0, %eax 1405; AVX512-NEXT: cmpb $-1, %al 1406; AVX512-NEXT: sete %al 1407; AVX512-NEXT: vzeroupper 1408; AVX512-NEXT: retq 1409 %a = icmp ule <8 x i32> %x, %y 1410 %s1 = shufflevector <8 x i1> %a, <8 x i1> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef> 1411 %b = and <8 x i1> %s1, %a 1412 %s2 = shufflevector <8 x i1> %b, <8 x i1> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1413 %c = and <8 x i1> %s2, %b 1414 %s3 = shufflevector <8 x i1> %c, <8 x i1> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1415 %d = and <8 x i1> %s3, %c 1416 %e = extractelement <8 x i1> %d, i32 0 1417 ret i1 %e 1418} 1419 1420define i1 @bool_reduction_v16i16(<16 x i16> %x, <16 x i16> %y) { 1421; SSE2-LABEL: bool_reduction_v16i16: 1422; SSE2: # %bb.0: 1423; SSE2-NEXT: pcmpeqb %xmm3, %xmm1 1424; SSE2-NEXT: pcmpeqb %xmm2, %xmm0 1425; SSE2-NEXT: pand %xmm1, %xmm0 1426; SSE2-NEXT: pmovmskb %xmm0, %eax 1427; SSE2-NEXT: xorl $65535, %eax # imm = 0xFFFF 1428; SSE2-NEXT: sete %al 1429; SSE2-NEXT: retq 1430; 1431; SSE42-LABEL: bool_reduction_v16i16: 1432; SSE42: # %bb.0: 1433; SSE42-NEXT: pxor %xmm3, %xmm1 1434; SSE42-NEXT: pxor %xmm2, %xmm0 1435; SSE42-NEXT: por %xmm1, %xmm0 1436; SSE42-NEXT: ptest %xmm0, %xmm0 1437; SSE42-NEXT: sete %al 1438; SSE42-NEXT: retq 1439; 1440; AVX1-LABEL: bool_reduction_v16i16: 1441; AVX1: # %bb.0: 1442; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0 1443; AVX1-NEXT: vptest %ymm0, %ymm0 1444; AVX1-NEXT: sete %al 1445; AVX1-NEXT: vzeroupper 1446; AVX1-NEXT: retq 1447; 1448; AVX2-LABEL: bool_reduction_v16i16: 1449; AVX2: # %bb.0: 1450; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0 1451; AVX2-NEXT: vptest %ymm0, %ymm0 1452; AVX2-NEXT: sete %al 1453; AVX2-NEXT: vzeroupper 1454; AVX2-NEXT: retq 1455; 1456; AVX512-LABEL: bool_reduction_v16i16: 1457; AVX512: # %bb.0: 1458; AVX512-NEXT: vpxor %ymm1, %ymm0, %ymm0 1459; AVX512-NEXT: vptest %ymm0, %ymm0 1460; AVX512-NEXT: sete %al 1461; AVX512-NEXT: vzeroupper 1462; AVX512-NEXT: retq 1463 %a = icmp eq <16 x i16> %x, %y 1464 %s1 = shufflevector <16 x i1> %a, <16 x i1> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1465 %b = and <16 x i1> %s1, %a 1466 %s2 = shufflevector <16 x i1> %b, <16 x i1> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1467 %c = and <16 x i1> %s2, %b 1468 %s3 = shufflevector <16 x i1> %c, <16 x i1> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1469 %d = and <16 x i1> %s3, %c 1470 %s4 = shufflevector <16 x i1> %d, <16 x i1> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1471 %e = and <16 x i1> %s4, %d 1472 %f = extractelement <16 x i1> %e, i32 0 1473 ret i1 %f 1474} 1475 1476define i1 @bool_reduction_v32i8(<32 x i8> %x, <32 x i8> %y) { 1477; SSE2-LABEL: bool_reduction_v32i8: 1478; SSE2: # %bb.0: 1479; SSE2-NEXT: pcmpeqb %xmm3, %xmm1 1480; SSE2-NEXT: pcmpeqb %xmm2, %xmm0 1481; SSE2-NEXT: pand %xmm1, %xmm0 1482; SSE2-NEXT: pmovmskb %xmm0, %eax 1483; SSE2-NEXT: xorl $65535, %eax # imm = 0xFFFF 1484; SSE2-NEXT: sete %al 1485; SSE2-NEXT: retq 1486; 1487; SSE42-LABEL: bool_reduction_v32i8: 1488; SSE42: # %bb.0: 1489; SSE42-NEXT: pxor %xmm3, %xmm1 1490; SSE42-NEXT: pxor %xmm2, %xmm0 1491; SSE42-NEXT: por %xmm1, %xmm0 1492; SSE42-NEXT: ptest %xmm0, %xmm0 1493; SSE42-NEXT: sete %al 1494; SSE42-NEXT: retq 1495; 1496; AVX1-LABEL: bool_reduction_v32i8: 1497; AVX1: # %bb.0: 1498; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0 1499; AVX1-NEXT: vptest %ymm0, %ymm0 1500; AVX1-NEXT: sete %al 1501; AVX1-NEXT: vzeroupper 1502; AVX1-NEXT: retq 1503; 1504; AVX2-LABEL: bool_reduction_v32i8: 1505; AVX2: # %bb.0: 1506; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0 1507; AVX2-NEXT: vptest %ymm0, %ymm0 1508; AVX2-NEXT: sete %al 1509; AVX2-NEXT: vzeroupper 1510; AVX2-NEXT: retq 1511; 1512; AVX512-LABEL: bool_reduction_v32i8: 1513; AVX512: # %bb.0: 1514; AVX512-NEXT: vpxor %ymm1, %ymm0, %ymm0 1515; AVX512-NEXT: vptest %ymm0, %ymm0 1516; AVX512-NEXT: sete %al 1517; AVX512-NEXT: vzeroupper 1518; AVX512-NEXT: retq 1519 %a = icmp eq <32 x i8> %x, %y 1520 %s1 = shufflevector <32 x i1> %a, <32 x i1> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1521 %b = and <32 x i1> %s1, %a 1522 %s2 = shufflevector <32 x i1> %b, <32 x i1> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1523 %c = and <32 x i1> %s2, %b 1524 %s3 = shufflevector <32 x i1> %c, <32 x i1> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1525 %d = and <32 x i1> %s3, %c 1526 %s4 = shufflevector <32 x i1> %d, <32 x i1> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1527 %e = and <32 x i1> %s4, %d 1528 %s5 = shufflevector <32 x i1> %e, <32 x i1> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1529 %f = and <32 x i1> %s5, %e 1530 %g = extractelement <32 x i1> %f, i32 0 1531 ret i1 %g 1532} 1533 1534; PR59867 1535define i1 @select_v2i8(ptr %s0, ptr %s1) { 1536; SSE2-LABEL: select_v2i8: 1537; SSE2: # %bb.0: 1538; SSE2-NEXT: movzwl (%rdi), %eax 1539; SSE2-NEXT: movd %eax, %xmm0 1540; SSE2-NEXT: movzwl (%rsi), %eax 1541; SSE2-NEXT: movd %eax, %xmm1 1542; SSE2-NEXT: pcmpeqb %xmm0, %xmm1 1543; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1544; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[0,0,1,1,4,5,6,7] 1545; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 1546; SSE2-NEXT: movmskpd %xmm0, %eax 1547; SSE2-NEXT: cmpl $3, %eax 1548; SSE2-NEXT: sete %al 1549; SSE2-NEXT: retq 1550; 1551; SSE42-LABEL: select_v2i8: 1552; SSE42: # %bb.0: 1553; SSE42-NEXT: movzwl (%rdi), %eax 1554; SSE42-NEXT: movd %eax, %xmm0 1555; SSE42-NEXT: movzwl (%rsi), %eax 1556; SSE42-NEXT: movd %eax, %xmm1 1557; SSE42-NEXT: pcmpeqb %xmm0, %xmm1 1558; SSE42-NEXT: pmovsxbq %xmm1, %xmm0 1559; SSE42-NEXT: movmskpd %xmm0, %eax 1560; SSE42-NEXT: cmpl $3, %eax 1561; SSE42-NEXT: sete %al 1562; SSE42-NEXT: retq 1563; 1564; AVX1OR2-LABEL: select_v2i8: 1565; AVX1OR2: # %bb.0: 1566; AVX1OR2-NEXT: movzwl (%rdi), %eax 1567; AVX1OR2-NEXT: vmovd %eax, %xmm0 1568; AVX1OR2-NEXT: movzwl (%rsi), %eax 1569; AVX1OR2-NEXT: vmovd %eax, %xmm1 1570; AVX1OR2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 1571; AVX1OR2-NEXT: vpmovsxbq %xmm0, %xmm0 1572; AVX1OR2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 1573; AVX1OR2-NEXT: vtestpd %xmm1, %xmm0 1574; AVX1OR2-NEXT: setb %al 1575; AVX1OR2-NEXT: retq 1576; 1577; AVX512-LABEL: select_v2i8: 1578; AVX512: # %bb.0: 1579; AVX512-NEXT: movzwl (%rdi), %eax 1580; AVX512-NEXT: vmovd %eax, %xmm0 1581; AVX512-NEXT: movzwl (%rsi), %eax 1582; AVX512-NEXT: vmovd %eax, %xmm1 1583; AVX512-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 1584; AVX512-NEXT: knotw %k0, %k0 1585; AVX512-NEXT: kmovd %k0, %eax 1586; AVX512-NEXT: testb $3, %al 1587; AVX512-NEXT: sete %al 1588; AVX512-NEXT: retq 1589 %v0 = load <2 x i8>, ptr %s0, align 1 1590 %v1 = load <2 x i8>, ptr %s1, align 1 1591 %cmp = icmp eq <2 x i8> %v0, %v1 1592 %cmp0 = extractelement <2 x i1> %cmp, i32 0 1593 %cmp1 = extractelement <2 x i1> %cmp, i32 1 1594 %res = select i1 %cmp0, i1 %cmp1, i1 false 1595 ret i1 %res 1596} 1597 1598define i1 @PR116977(<32 x i8> %a, <32 x i8> %b, <32 x i8> %v) { 1599; SSE-LABEL: PR116977: 1600; SSE: # %bb.0: 1601; SSE-NEXT: pcmpeqb %xmm4, %xmm0 1602; SSE-NEXT: pcmpeqb %xmm5, %xmm1 1603; SSE-NEXT: pcmpeqb %xmm4, %xmm2 1604; SSE-NEXT: pand %xmm0, %xmm2 1605; SSE-NEXT: pcmpeqb %xmm5, %xmm3 1606; SSE-NEXT: pand %xmm1, %xmm3 1607; SSE-NEXT: pand %xmm2, %xmm3 1608; SSE-NEXT: pmovmskb %xmm3, %eax 1609; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF 1610; SSE-NEXT: sete %al 1611; SSE-NEXT: retq 1612; 1613; AVX1-LABEL: PR116977: 1614; AVX1: # %bb.0: 1615; AVX1-NEXT: vpcmpeqb %xmm0, %xmm2, %xmm3 1616; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1617; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4 1618; AVX1-NEXT: vpcmpeqb %xmm0, %xmm4, %xmm0 1619; AVX1-NEXT: vpcmpeqb %xmm1, %xmm2, %xmm2 1620; AVX1-NEXT: vpand %xmm2, %xmm3, %xmm2 1621; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 1622; AVX1-NEXT: vpcmpeqb %xmm1, %xmm4, %xmm1 1623; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 1624; AVX1-NEXT: vpand %xmm0, %xmm2, %xmm0 1625; AVX1-NEXT: vpmovmskb %xmm0, %eax 1626; AVX1-NEXT: xorl $65535, %eax # imm = 0xFFFF 1627; AVX1-NEXT: sete %al 1628; AVX1-NEXT: vzeroupper 1629; AVX1-NEXT: retq 1630; 1631; AVX2-LABEL: PR116977: 1632; AVX2: # %bb.0: 1633; AVX2-NEXT: vpxor %ymm0, %ymm2, %ymm0 1634; AVX2-NEXT: vpxor %ymm1, %ymm2, %ymm1 1635; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 1636; AVX2-NEXT: vptest %ymm0, %ymm0 1637; AVX2-NEXT: sete %al 1638; AVX2-NEXT: vzeroupper 1639; AVX2-NEXT: retq 1640; 1641; AVX512-LABEL: PR116977: 1642; AVX512: # %bb.0: 1643; AVX512-NEXT: vpcmpneqb %ymm0, %ymm2, %k0 1644; AVX512-NEXT: vpcmpneqb %ymm1, %ymm2, %k1 1645; AVX512-NEXT: kortestd %k1, %k0 1646; AVX512-NEXT: sete %al 1647; AVX512-NEXT: vzeroupper 1648; AVX512-NEXT: retq 1649 %ca = icmp ne <32 x i8> %v, %a 1650 %cb = icmp ne <32 x i8> %v, %b 1651 %or = or <32 x i1> %ca, %cb 1652 %scl = bitcast <32 x i1> %or to i32 1653 %cmp = icmp eq i32 %scl, 0 1654 ret i1 %cmp 1655} 1656