1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2 3; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE,SSE42 4; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1OR2,AVX1 5; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX1OR2,AVX2 6; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=AVX,AVX512 7 8define i64 @test_v2f64_sext(<2 x double> %a0, <2 x double> %a1) { 9; SSE-LABEL: test_v2f64_sext: 10; SSE: # %bb.0: 11; SSE-NEXT: cmpltpd %xmm0, %xmm1 12; SSE-NEXT: movmskpd %xmm1, %ecx 13; SSE-NEXT: xorl %eax, %eax 14; SSE-NEXT: negl %ecx 15; SSE-NEXT: sbbq %rax, %rax 16; SSE-NEXT: retq 17; 18; AVX-LABEL: test_v2f64_sext: 19; AVX: # %bb.0: 20; AVX-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0 21; AVX-NEXT: xorl %eax, %eax 22; AVX-NEXT: vtestpd %xmm0, %xmm0 23; AVX-NEXT: setne %al 24; AVX-NEXT: negq %rax 25; AVX-NEXT: retq 26 %c = fcmp ogt <2 x double> %a0, %a1 27 %s = sext <2 x i1> %c to <2 x i64> 28 %1 = shufflevector <2 x i64> %s, <2 x i64> undef, <2 x i32> <i32 1, i32 undef> 29 %2 = or <2 x i64> %s, %1 30 %3 = extractelement <2 x i64> %2, i32 0 31 ret i64 %3 32} 33 34define i64 @test_v4f64_sext(<4 x double> %a0, <4 x double> %a1) { 35; SSE-LABEL: test_v4f64_sext: 36; SSE: # %bb.0: 37; SSE-NEXT: cmpltpd %xmm1, %xmm3 38; SSE-NEXT: cmpltpd %xmm0, %xmm2 39; SSE-NEXT: orpd %xmm3, %xmm2 40; SSE-NEXT: movmskpd %xmm2, %ecx 41; SSE-NEXT: xorl %eax, %eax 42; SSE-NEXT: negl %ecx 43; SSE-NEXT: sbbq %rax, %rax 44; SSE-NEXT: retq 45; 46; AVX-LABEL: test_v4f64_sext: 47; AVX: # %bb.0: 48; AVX-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0 49; AVX-NEXT: xorl %eax, %eax 50; AVX-NEXT: vtestpd %ymm0, %ymm0 51; AVX-NEXT: setne %al 52; AVX-NEXT: negq %rax 53; AVX-NEXT: vzeroupper 54; AVX-NEXT: retq 55 %c = fcmp ogt <4 x double> %a0, %a1 56 %s = sext <4 x i1> %c to <4 x i64> 57 %1 = shufflevector <4 x i64> %s, <4 x i64> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> 58 %2 = or <4 x i64> %s, %1 59 %3 = shufflevector <4 x i64> %2, <4 x i64> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 60 %4 = or <4 x i64> %2, %3 61 %5 = extractelement <4 x i64> %4, i64 0 62 ret i64 %5 63} 64 65define i64 @test_v4f64_legal_sext(<4 x double> %a0, <4 x double> %a1) { 66; SSE-LABEL: test_v4f64_legal_sext: 67; SSE: # %bb.0: 68; SSE-NEXT: cmpltpd %xmm1, %xmm3 69; SSE-NEXT: cmpltpd %xmm0, %xmm2 70; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2] 71; SSE-NEXT: movmskps %xmm2, %ecx 72; SSE-NEXT: xorl %eax, %eax 73; SSE-NEXT: negl %ecx 74; SSE-NEXT: sbbq %rax, %rax 75; SSE-NEXT: retq 76; 77; AVX1OR2-LABEL: test_v4f64_legal_sext: 78; AVX1OR2: # %bb.0: 79; AVX1OR2-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0 80; AVX1OR2-NEXT: vextractf128 $1, %ymm0, %xmm1 81; AVX1OR2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 82; AVX1OR2-NEXT: xorl %eax, %eax 83; AVX1OR2-NEXT: vtestps %xmm0, %xmm0 84; AVX1OR2-NEXT: setne %al 85; AVX1OR2-NEXT: negq %rax 86; AVX1OR2-NEXT: vzeroupper 87; AVX1OR2-NEXT: retq 88; 89; AVX512-LABEL: test_v4f64_legal_sext: 90; AVX512: # %bb.0: 91; AVX512-NEXT: vcmpltpd %ymm0, %ymm1, %k1 92; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 93; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 94; AVX512-NEXT: xorl %eax, %eax 95; AVX512-NEXT: vtestps %xmm0, %xmm0 96; AVX512-NEXT: setne %al 97; AVX512-NEXT: negq %rax 98; AVX512-NEXT: vzeroupper 99; AVX512-NEXT: retq 100 %c = fcmp ogt <4 x double> %a0, %a1 101 %s = sext <4 x i1> %c to <4 x i32> 102 %1 = shufflevector <4 x i32> %s, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> 103 %2 = or <4 x i32> %s, %1 104 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 105 %4 = or <4 x i32> %2, %3 106 %5 = extractelement <4 x i32> %4, i64 0 107 %6 = sext i32 %5 to i64 108 ret i64 %6 109} 110 111define i32 @test_v4f32_sext(<4 x float> %a0, <4 x float> %a1) { 112; SSE-LABEL: test_v4f32_sext: 113; SSE: # %bb.0: 114; SSE-NEXT: cmpltps %xmm0, %xmm1 115; SSE-NEXT: movmskps %xmm1, %ecx 116; SSE-NEXT: xorl %eax, %eax 117; SSE-NEXT: negl %ecx 118; SSE-NEXT: sbbl %eax, %eax 119; SSE-NEXT: retq 120; 121; AVX-LABEL: test_v4f32_sext: 122; AVX: # %bb.0: 123; AVX-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 124; AVX-NEXT: xorl %eax, %eax 125; AVX-NEXT: vtestps %xmm0, %xmm0 126; AVX-NEXT: setne %al 127; AVX-NEXT: negl %eax 128; AVX-NEXT: retq 129 %c = fcmp ogt <4 x float> %a0, %a1 130 %s = sext <4 x i1> %c to <4 x i32> 131 %1 = shufflevector <4 x i32> %s, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> 132 %2 = or <4 x i32> %s, %1 133 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 134 %4 = or <4 x i32> %2, %3 135 %5 = extractelement <4 x i32> %4, i32 0 136 ret i32 %5 137} 138 139define i32 @test_v8f32_sext(<8 x float> %a0, <8 x float> %a1) { 140; SSE-LABEL: test_v8f32_sext: 141; SSE: # %bb.0: 142; SSE-NEXT: cmpltps %xmm1, %xmm3 143; SSE-NEXT: cmpltps %xmm0, %xmm2 144; SSE-NEXT: orps %xmm3, %xmm2 145; SSE-NEXT: movmskps %xmm2, %ecx 146; SSE-NEXT: xorl %eax, %eax 147; SSE-NEXT: negl %ecx 148; SSE-NEXT: sbbl %eax, %eax 149; SSE-NEXT: retq 150; 151; AVX-LABEL: test_v8f32_sext: 152; AVX: # %bb.0: 153; AVX-NEXT: vcmpltps %ymm0, %ymm1, %ymm0 154; AVX-NEXT: xorl %eax, %eax 155; AVX-NEXT: vtestps %ymm0, %ymm0 156; AVX-NEXT: setne %al 157; AVX-NEXT: negl %eax 158; AVX-NEXT: vzeroupper 159; AVX-NEXT: retq 160 %c = fcmp ogt <8 x float> %a0, %a1 161 %s = sext <8 x i1> %c to <8 x i32> 162 %1 = shufflevector <8 x i32> %s, <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef> 163 %2 = or <8 x i32> %s, %1 164 %3 = shufflevector <8 x i32> %2, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 165 %4 = or <8 x i32> %2, %3 166 %5 = shufflevector <8 x i32> %4, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 167 %6 = or <8 x i32> %4, %5 168 %7 = extractelement <8 x i32> %6, i32 0 169 ret i32 %7 170} 171 172define i32 @test_v8f32_legal_sext(<8 x float> %a0, <8 x float> %a1) { 173; SSE-LABEL: test_v8f32_legal_sext: 174; SSE: # %bb.0: 175; SSE-NEXT: cmpltps %xmm1, %xmm3 176; SSE-NEXT: cmpltps %xmm0, %xmm2 177; SSE-NEXT: packssdw %xmm3, %xmm2 178; SSE-NEXT: pmovmskb %xmm2, %ecx 179; SSE-NEXT: xorl %eax, %eax 180; SSE-NEXT: negl %ecx 181; SSE-NEXT: sbbl %eax, %eax 182; SSE-NEXT: retq 183; 184; AVX1OR2-LABEL: test_v8f32_legal_sext: 185; AVX1OR2: # %bb.0: 186; AVX1OR2-NEXT: vcmpltps %ymm0, %ymm1, %ymm0 187; AVX1OR2-NEXT: vextractf128 $1, %ymm0, %xmm1 188; AVX1OR2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 189; AVX1OR2-NEXT: vpmovmskb %xmm0, %ecx 190; AVX1OR2-NEXT: xorl %eax, %eax 191; AVX1OR2-NEXT: negl %ecx 192; AVX1OR2-NEXT: sbbl %eax, %eax 193; AVX1OR2-NEXT: vzeroupper 194; AVX1OR2-NEXT: retq 195; 196; AVX512-LABEL: test_v8f32_legal_sext: 197; AVX512: # %bb.0: 198; AVX512-NEXT: vcmpltps %ymm0, %ymm1, %k0 199; AVX512-NEXT: vpmovm2w %k0, %xmm0 200; AVX512-NEXT: vpmovmskb %xmm0, %ecx 201; AVX512-NEXT: xorl %eax, %eax 202; AVX512-NEXT: negl %ecx 203; AVX512-NEXT: sbbl %eax, %eax 204; AVX512-NEXT: vzeroupper 205; AVX512-NEXT: retq 206 %c = fcmp ogt <8 x float> %a0, %a1 207 %s = sext <8 x i1> %c to <8 x i16> 208 %1 = shufflevector <8 x i16> %s, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef> 209 %2 = or <8 x i16> %s, %1 210 %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 211 %4 = or <8 x i16> %2, %3 212 %5 = shufflevector <8 x i16> %4, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 213 %6 = or <8 x i16> %4, %5 214 %7 = extractelement <8 x i16> %6, i32 0 215 %8 = sext i16 %7 to i32 216 ret i32 %8 217} 218 219define i64 @test_v2i64_sext(<2 x i64> %a0, <2 x i64> %a1) { 220; SSE2-LABEL: test_v2i64_sext: 221; SSE2: # %bb.0: 222; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648] 223; SSE2-NEXT: pxor %xmm2, %xmm1 224; SSE2-NEXT: pxor %xmm2, %xmm0 225; SSE2-NEXT: movdqa %xmm0, %xmm2 226; SSE2-NEXT: pcmpeqd %xmm1, %xmm2 227; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 228; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2] 229; SSE2-NEXT: pand %xmm2, %xmm1 230; SSE2-NEXT: por %xmm0, %xmm1 231; SSE2-NEXT: movmskpd %xmm1, %ecx 232; SSE2-NEXT: xorl %eax, %eax 233; SSE2-NEXT: negl %ecx 234; SSE2-NEXT: sbbq %rax, %rax 235; SSE2-NEXT: retq 236; 237; SSE42-LABEL: test_v2i64_sext: 238; SSE42: # %bb.0: 239; SSE42-NEXT: pcmpgtq %xmm1, %xmm0 240; SSE42-NEXT: movmskpd %xmm0, %ecx 241; SSE42-NEXT: xorl %eax, %eax 242; SSE42-NEXT: negl %ecx 243; SSE42-NEXT: sbbq %rax, %rax 244; SSE42-NEXT: retq 245; 246; AVX-LABEL: test_v2i64_sext: 247; AVX: # %bb.0: 248; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 249; AVX-NEXT: xorl %eax, %eax 250; AVX-NEXT: vtestpd %xmm0, %xmm0 251; AVX-NEXT: setne %al 252; AVX-NEXT: negq %rax 253; AVX-NEXT: retq 254 %c = icmp sgt <2 x i64> %a0, %a1 255 %s = sext <2 x i1> %c to <2 x i64> 256 %1 = shufflevector <2 x i64> %s, <2 x i64> undef, <2 x i32> <i32 1, i32 undef> 257 %2 = or <2 x i64> %s, %1 258 %3 = extractelement <2 x i64> %2, i32 0 259 ret i64 %3 260} 261 262define i64 @test_v4i64_sext(<4 x i64> %a0, <4 x i64> %a1) { 263; SSE2-LABEL: test_v4i64_sext: 264; SSE2: # %bb.0: 265; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648] 266; SSE2-NEXT: pxor %xmm4, %xmm3 267; SSE2-NEXT: pxor %xmm4, %xmm1 268; SSE2-NEXT: movdqa %xmm1, %xmm5 269; SSE2-NEXT: pcmpeqd %xmm3, %xmm5 270; SSE2-NEXT: pcmpgtd %xmm3, %xmm1 271; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2] 272; SSE2-NEXT: pand %xmm5, %xmm3 273; SSE2-NEXT: por %xmm1, %xmm3 274; SSE2-NEXT: pxor %xmm4, %xmm2 275; SSE2-NEXT: pxor %xmm4, %xmm0 276; SSE2-NEXT: movdqa %xmm0, %xmm1 277; SSE2-NEXT: pcmpeqd %xmm2, %xmm1 278; SSE2-NEXT: pcmpgtd %xmm2, %xmm0 279; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2] 280; SSE2-NEXT: pand %xmm1, %xmm2 281; SSE2-NEXT: por %xmm0, %xmm2 282; SSE2-NEXT: por %xmm3, %xmm2 283; SSE2-NEXT: movmskpd %xmm2, %ecx 284; SSE2-NEXT: xorl %eax, %eax 285; SSE2-NEXT: negl %ecx 286; SSE2-NEXT: sbbq %rax, %rax 287; SSE2-NEXT: retq 288; 289; SSE42-LABEL: test_v4i64_sext: 290; SSE42: # %bb.0: 291; SSE42-NEXT: pcmpgtq %xmm3, %xmm1 292; SSE42-NEXT: pcmpgtq %xmm2, %xmm0 293; SSE42-NEXT: por %xmm1, %xmm0 294; SSE42-NEXT: movmskpd %xmm0, %ecx 295; SSE42-NEXT: xorl %eax, %eax 296; SSE42-NEXT: negl %ecx 297; SSE42-NEXT: sbbq %rax, %rax 298; SSE42-NEXT: retq 299; 300; AVX1-LABEL: test_v4i64_sext: 301; AVX1: # %bb.0: 302; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 303; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 304; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 305; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 306; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0 307; AVX1-NEXT: xorl %eax, %eax 308; AVX1-NEXT: vtestpd %xmm0, %xmm0 309; AVX1-NEXT: setne %al 310; AVX1-NEXT: negq %rax 311; AVX1-NEXT: vzeroupper 312; AVX1-NEXT: retq 313; 314; AVX2-LABEL: test_v4i64_sext: 315; AVX2: # %bb.0: 316; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 317; AVX2-NEXT: xorl %eax, %eax 318; AVX2-NEXT: vtestpd %ymm0, %ymm0 319; AVX2-NEXT: setne %al 320; AVX2-NEXT: negq %rax 321; AVX2-NEXT: vzeroupper 322; AVX2-NEXT: retq 323; 324; AVX512-LABEL: test_v4i64_sext: 325; AVX512: # %bb.0: 326; AVX512-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 327; AVX512-NEXT: xorl %eax, %eax 328; AVX512-NEXT: vtestpd %ymm0, %ymm0 329; AVX512-NEXT: setne %al 330; AVX512-NEXT: negq %rax 331; AVX512-NEXT: vzeroupper 332; AVX512-NEXT: retq 333 %c = icmp sgt <4 x i64> %a0, %a1 334 %s = sext <4 x i1> %c to <4 x i64> 335 %1 = shufflevector <4 x i64> %s, <4 x i64> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> 336 %2 = or <4 x i64> %s, %1 337 %3 = shufflevector <4 x i64> %2, <4 x i64> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 338 %4 = or <4 x i64> %2, %3 339 %5 = extractelement <4 x i64> %4, i64 0 340 ret i64 %5 341} 342 343define i64 @test_v4i64_legal_sext(<4 x i64> %a0, <4 x i64> %a1) { 344; SSE2-LABEL: test_v4i64_legal_sext: 345; SSE2: # %bb.0: 346; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648] 347; SSE2-NEXT: pxor %xmm4, %xmm3 348; SSE2-NEXT: pxor %xmm4, %xmm1 349; SSE2-NEXT: movdqa %xmm1, %xmm5 350; SSE2-NEXT: pcmpgtd %xmm3, %xmm5 351; SSE2-NEXT: pxor %xmm4, %xmm2 352; SSE2-NEXT: pxor %xmm4, %xmm0 353; SSE2-NEXT: movdqa %xmm0, %xmm4 354; SSE2-NEXT: pcmpgtd %xmm2, %xmm4 355; SSE2-NEXT: movdqa %xmm4, %xmm6 356; SSE2-NEXT: shufps {{.*#+}} xmm6 = xmm6[0,2],xmm5[0,2] 357; SSE2-NEXT: pcmpeqd %xmm3, %xmm1 358; SSE2-NEXT: pcmpeqd %xmm2, %xmm0 359; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3] 360; SSE2-NEXT: andps %xmm6, %xmm0 361; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,3],xmm5[1,3] 362; SSE2-NEXT: orps %xmm0, %xmm4 363; SSE2-NEXT: movmskps %xmm4, %ecx 364; SSE2-NEXT: xorl %eax, %eax 365; SSE2-NEXT: negl %ecx 366; SSE2-NEXT: sbbq %rax, %rax 367; SSE2-NEXT: retq 368; 369; SSE42-LABEL: test_v4i64_legal_sext: 370; SSE42: # %bb.0: 371; SSE42-NEXT: pcmpgtq %xmm3, %xmm1 372; SSE42-NEXT: pcmpgtq %xmm2, %xmm0 373; SSE42-NEXT: packssdw %xmm1, %xmm0 374; SSE42-NEXT: movmskps %xmm0, %ecx 375; SSE42-NEXT: xorl %eax, %eax 376; SSE42-NEXT: negl %ecx 377; SSE42-NEXT: sbbq %rax, %rax 378; SSE42-NEXT: retq 379; 380; AVX1-LABEL: test_v4i64_legal_sext: 381; AVX1: # %bb.0: 382; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 383; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 384; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 385; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 386; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 387; AVX1-NEXT: xorl %eax, %eax 388; AVX1-NEXT: vtestps %xmm0, %xmm0 389; AVX1-NEXT: setne %al 390; AVX1-NEXT: negq %rax 391; AVX1-NEXT: vzeroupper 392; AVX1-NEXT: retq 393; 394; AVX2-LABEL: test_v4i64_legal_sext: 395; AVX2: # %bb.0: 396; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 397; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 398; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 399; AVX2-NEXT: xorl %eax, %eax 400; AVX2-NEXT: vtestps %xmm0, %xmm0 401; AVX2-NEXT: setne %al 402; AVX2-NEXT: negq %rax 403; AVX2-NEXT: vzeroupper 404; AVX2-NEXT: retq 405; 406; AVX512-LABEL: test_v4i64_legal_sext: 407; AVX512: # %bb.0: 408; AVX512-NEXT: vpcmpgtq %ymm1, %ymm0, %k1 409; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 410; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 411; AVX512-NEXT: xorl %eax, %eax 412; AVX512-NEXT: vtestps %xmm0, %xmm0 413; AVX512-NEXT: setne %al 414; AVX512-NEXT: negq %rax 415; AVX512-NEXT: vzeroupper 416; AVX512-NEXT: retq 417 %c = icmp sgt <4 x i64> %a0, %a1 418 %s = sext <4 x i1> %c to <4 x i32> 419 %1 = shufflevector <4 x i32> %s, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> 420 %2 = or <4 x i32> %s, %1 421 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 422 %4 = or <4 x i32> %2, %3 423 %5 = extractelement <4 x i32> %4, i64 0 424 %6 = sext i32 %5 to i64 425 ret i64 %6 426} 427 428define i32 @test_v4i32_sext(<4 x i32> %a0, <4 x i32> %a1) { 429; SSE-LABEL: test_v4i32_sext: 430; SSE: # %bb.0: 431; SSE-NEXT: pcmpgtd %xmm1, %xmm0 432; SSE-NEXT: movmskps %xmm0, %ecx 433; SSE-NEXT: xorl %eax, %eax 434; SSE-NEXT: negl %ecx 435; SSE-NEXT: sbbl %eax, %eax 436; SSE-NEXT: retq 437; 438; AVX-LABEL: test_v4i32_sext: 439; AVX: # %bb.0: 440; AVX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 441; AVX-NEXT: xorl %eax, %eax 442; AVX-NEXT: vtestps %xmm0, %xmm0 443; AVX-NEXT: setne %al 444; AVX-NEXT: negl %eax 445; AVX-NEXT: retq 446 %c = icmp sgt <4 x i32> %a0, %a1 447 %s = sext <4 x i1> %c to <4 x i32> 448 %1 = shufflevector <4 x i32> %s, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> 449 %2 = or <4 x i32> %s, %1 450 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 451 %4 = or <4 x i32> %2, %3 452 %5 = extractelement <4 x i32> %4, i32 0 453 ret i32 %5 454} 455 456define i32 @test_v8i32_sext(<8 x i32> %a0, <8 x i32> %a1) { 457; SSE-LABEL: test_v8i32_sext: 458; SSE: # %bb.0: 459; SSE-NEXT: pcmpgtd %xmm3, %xmm1 460; SSE-NEXT: pcmpgtd %xmm2, %xmm0 461; SSE-NEXT: por %xmm1, %xmm0 462; SSE-NEXT: movmskps %xmm0, %ecx 463; SSE-NEXT: xorl %eax, %eax 464; SSE-NEXT: negl %ecx 465; SSE-NEXT: sbbl %eax, %eax 466; SSE-NEXT: retq 467; 468; AVX1-LABEL: test_v8i32_sext: 469; AVX1: # %bb.0: 470; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 471; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 472; AVX1-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm2 473; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 474; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0 475; AVX1-NEXT: xorl %eax, %eax 476; AVX1-NEXT: vtestps %xmm0, %xmm0 477; AVX1-NEXT: setne %al 478; AVX1-NEXT: negl %eax 479; AVX1-NEXT: vzeroupper 480; AVX1-NEXT: retq 481; 482; AVX2-LABEL: test_v8i32_sext: 483; AVX2: # %bb.0: 484; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 485; AVX2-NEXT: xorl %eax, %eax 486; AVX2-NEXT: vtestps %ymm0, %ymm0 487; AVX2-NEXT: setne %al 488; AVX2-NEXT: negl %eax 489; AVX2-NEXT: vzeroupper 490; AVX2-NEXT: retq 491; 492; AVX512-LABEL: test_v8i32_sext: 493; AVX512: # %bb.0: 494; AVX512-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 495; AVX512-NEXT: xorl %eax, %eax 496; AVX512-NEXT: vtestps %ymm0, %ymm0 497; AVX512-NEXT: setne %al 498; AVX512-NEXT: negl %eax 499; AVX512-NEXT: vzeroupper 500; AVX512-NEXT: retq 501 %c = icmp sgt <8 x i32> %a0, %a1 502 %s = sext <8 x i1> %c to <8 x i32> 503 %1 = shufflevector <8 x i32> %s, <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef> 504 %2 = or <8 x i32> %s, %1 505 %3 = shufflevector <8 x i32> %2, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 506 %4 = or <8 x i32> %2, %3 507 %5 = shufflevector <8 x i32> %4, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 508 %6 = or <8 x i32> %4, %5 509 %7 = extractelement <8 x i32> %6, i32 0 510 ret i32 %7 511} 512 513define i32 @test_v8i32_legal_sext(<8 x i32> %a0, <8 x i32> %a1) { 514; SSE-LABEL: test_v8i32_legal_sext: 515; SSE: # %bb.0: 516; SSE-NEXT: pcmpgtd %xmm3, %xmm1 517; SSE-NEXT: pcmpgtd %xmm2, %xmm0 518; SSE-NEXT: packssdw %xmm1, %xmm0 519; SSE-NEXT: pmovmskb %xmm0, %ecx 520; SSE-NEXT: xorl %eax, %eax 521; SSE-NEXT: negl %ecx 522; SSE-NEXT: sbbl %eax, %eax 523; SSE-NEXT: retq 524; 525; AVX1-LABEL: test_v8i32_legal_sext: 526; AVX1: # %bb.0: 527; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 528; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 529; AVX1-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm2 530; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 531; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 532; AVX1-NEXT: vpmovmskb %xmm0, %ecx 533; AVX1-NEXT: xorl %eax, %eax 534; AVX1-NEXT: negl %ecx 535; AVX1-NEXT: sbbl %eax, %eax 536; AVX1-NEXT: vzeroupper 537; AVX1-NEXT: retq 538; 539; AVX2-LABEL: test_v8i32_legal_sext: 540; AVX2: # %bb.0: 541; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 542; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 543; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 544; AVX2-NEXT: vpmovmskb %xmm0, %ecx 545; AVX2-NEXT: xorl %eax, %eax 546; AVX2-NEXT: negl %ecx 547; AVX2-NEXT: sbbl %eax, %eax 548; AVX2-NEXT: vzeroupper 549; AVX2-NEXT: retq 550; 551; AVX512-LABEL: test_v8i32_legal_sext: 552; AVX512: # %bb.0: 553; AVX512-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 554; AVX512-NEXT: vpmovm2w %k0, %xmm0 555; AVX512-NEXT: vpmovmskb %xmm0, %ecx 556; AVX512-NEXT: xorl %eax, %eax 557; AVX512-NEXT: negl %ecx 558; AVX512-NEXT: sbbl %eax, %eax 559; AVX512-NEXT: vzeroupper 560; AVX512-NEXT: retq 561 %c = icmp sgt <8 x i32> %a0, %a1 562 %s = sext <8 x i1> %c to <8 x i16> 563 %1 = shufflevector <8 x i16> %s, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef> 564 %2 = or <8 x i16> %s, %1 565 %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 566 %4 = or <8 x i16> %2, %3 567 %5 = shufflevector <8 x i16> %4, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 568 %6 = or <8 x i16> %4, %5 569 %7 = extractelement <8 x i16> %6, i32 0 570 %8 = sext i16 %7 to i32 571 ret i32 %8 572} 573 574define i16 @test_v8i16_sext(<8 x i16> %a0, <8 x i16> %a1) { 575; SSE-LABEL: test_v8i16_sext: 576; SSE: # %bb.0: 577; SSE-NEXT: pcmpgtw %xmm1, %xmm0 578; SSE-NEXT: pmovmskb %xmm0, %ecx 579; SSE-NEXT: xorl %eax, %eax 580; SSE-NEXT: negl %ecx 581; SSE-NEXT: sbbl %eax, %eax 582; SSE-NEXT: # kill: def $ax killed $ax killed $eax 583; SSE-NEXT: retq 584; 585; AVX-LABEL: test_v8i16_sext: 586; AVX: # %bb.0: 587; AVX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 588; AVX-NEXT: vpmovmskb %xmm0, %ecx 589; AVX-NEXT: xorl %eax, %eax 590; AVX-NEXT: negl %ecx 591; AVX-NEXT: sbbl %eax, %eax 592; AVX-NEXT: # kill: def $ax killed $ax killed $eax 593; AVX-NEXT: retq 594 %c = icmp sgt <8 x i16> %a0, %a1 595 %s = sext <8 x i1> %c to <8 x i16> 596 %1 = shufflevector <8 x i16> %s, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef> 597 %2 = or <8 x i16> %s, %1 598 %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 599 %4 = or <8 x i16> %2, %3 600 %5 = shufflevector <8 x i16> %4, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 601 %6 = or <8 x i16> %4, %5 602 %7 = extractelement <8 x i16> %6, i32 0 603 ret i16 %7 604} 605 606define i16 @test_v16i16_sext(<16 x i16> %a0, <16 x i16> %a1) { 607; SSE-LABEL: test_v16i16_sext: 608; SSE: # %bb.0: 609; SSE-NEXT: pcmpgtw %xmm3, %xmm1 610; SSE-NEXT: pcmpgtw %xmm2, %xmm0 611; SSE-NEXT: por %xmm1, %xmm0 612; SSE-NEXT: pmovmskb %xmm0, %ecx 613; SSE-NEXT: xorl %eax, %eax 614; SSE-NEXT: negl %ecx 615; SSE-NEXT: sbbl %eax, %eax 616; SSE-NEXT: # kill: def $ax killed $ax killed $eax 617; SSE-NEXT: retq 618; 619; AVX1-LABEL: test_v16i16_sext: 620; AVX1: # %bb.0: 621; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 622; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 623; AVX1-NEXT: vpcmpgtw %xmm2, %xmm3, %xmm2 624; AVX1-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 625; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0 626; AVX1-NEXT: vpmovmskb %xmm0, %ecx 627; AVX1-NEXT: xorl %eax, %eax 628; AVX1-NEXT: negl %ecx 629; AVX1-NEXT: sbbl %eax, %eax 630; AVX1-NEXT: # kill: def $ax killed $ax killed $eax 631; AVX1-NEXT: vzeroupper 632; AVX1-NEXT: retq 633; 634; AVX2-LABEL: test_v16i16_sext: 635; AVX2: # %bb.0: 636; AVX2-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 637; AVX2-NEXT: vpmovmskb %ymm0, %ecx 638; AVX2-NEXT: xorl %eax, %eax 639; AVX2-NEXT: negl %ecx 640; AVX2-NEXT: sbbl %eax, %eax 641; AVX2-NEXT: # kill: def $ax killed $ax killed $eax 642; AVX2-NEXT: vzeroupper 643; AVX2-NEXT: retq 644; 645; AVX512-LABEL: test_v16i16_sext: 646; AVX512: # %bb.0: 647; AVX512-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 648; AVX512-NEXT: vpmovmskb %ymm0, %ecx 649; AVX512-NEXT: xorl %eax, %eax 650; AVX512-NEXT: negl %ecx 651; AVX512-NEXT: sbbl %eax, %eax 652; AVX512-NEXT: # kill: def $ax killed $ax killed $eax 653; AVX512-NEXT: vzeroupper 654; AVX512-NEXT: retq 655 %c = icmp sgt <16 x i16> %a0, %a1 656 %s = sext <16 x i1> %c to <16 x i16> 657 %1 = shufflevector <16 x i16> %s, <16 x i16> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 658 %2 = or <16 x i16> %s, %1 659 %3 = shufflevector <16 x i16> %2, <16 x i16> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 660 %4 = or <16 x i16> %2, %3 661 %5 = shufflevector <16 x i16> %4, <16 x i16> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 662 %6 = or <16 x i16> %4, %5 663 %7 = shufflevector <16 x i16> %6, <16 x i16> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 664 %8 = or <16 x i16> %6, %7 665 %9 = extractelement <16 x i16> %8, i32 0 666 ret i16 %9 667} 668 669define i16 @test_v16i16_legal_sext(<16 x i16> %a0, <16 x i16> %a1) { 670; SSE-LABEL: test_v16i16_legal_sext: 671; SSE: # %bb.0: 672; SSE-NEXT: pcmpgtw %xmm3, %xmm1 673; SSE-NEXT: pcmpgtw %xmm2, %xmm0 674; SSE-NEXT: packsswb %xmm1, %xmm0 675; SSE-NEXT: pmovmskb %xmm0, %ecx 676; SSE-NEXT: xorl %eax, %eax 677; SSE-NEXT: negl %ecx 678; SSE-NEXT: sbbl %eax, %eax 679; SSE-NEXT: # kill: def $ax killed $ax killed $eax 680; SSE-NEXT: retq 681; 682; AVX1-LABEL: test_v16i16_legal_sext: 683; AVX1: # %bb.0: 684; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 685; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 686; AVX1-NEXT: vpcmpgtw %xmm2, %xmm3, %xmm2 687; AVX1-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 688; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0 689; AVX1-NEXT: vpmovmskb %xmm0, %ecx 690; AVX1-NEXT: xorl %eax, %eax 691; AVX1-NEXT: negl %ecx 692; AVX1-NEXT: sbbl %eax, %eax 693; AVX1-NEXT: # kill: def $ax killed $ax killed $eax 694; AVX1-NEXT: vzeroupper 695; AVX1-NEXT: retq 696; 697; AVX2-LABEL: test_v16i16_legal_sext: 698; AVX2: # %bb.0: 699; AVX2-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 700; AVX2-NEXT: vpmovmskb %ymm0, %ecx 701; AVX2-NEXT: xorl %eax, %eax 702; AVX2-NEXT: negl %ecx 703; AVX2-NEXT: sbbl %eax, %eax 704; AVX2-NEXT: # kill: def $ax killed $ax killed $eax 705; AVX2-NEXT: vzeroupper 706; AVX2-NEXT: retq 707; 708; AVX512-LABEL: test_v16i16_legal_sext: 709; AVX512: # %bb.0: 710; AVX512-NEXT: vpcmpgtw %ymm1, %ymm0, %k0 711; AVX512-NEXT: vpmovm2b %k0, %xmm0 712; AVX512-NEXT: vpmovmskb %xmm0, %ecx 713; AVX512-NEXT: xorl %eax, %eax 714; AVX512-NEXT: negl %ecx 715; AVX512-NEXT: sbbl %eax, %eax 716; AVX512-NEXT: # kill: def $ax killed $ax killed $eax 717; AVX512-NEXT: vzeroupper 718; AVX512-NEXT: retq 719 %c = icmp sgt <16 x i16> %a0, %a1 720 %s = sext <16 x i1> %c to <16 x i8> 721 %1 = shufflevector <16 x i8> %s, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 722 %2 = or <16 x i8> %s, %1 723 %3 = shufflevector <16 x i8> %2, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 724 %4 = or <16 x i8> %2, %3 725 %5 = shufflevector <16 x i8> %4, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 726 %6 = or <16 x i8> %4, %5 727 %7 = shufflevector <16 x i8> %6, <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 728 %8 = or <16 x i8> %6, %7 729 %9 = extractelement <16 x i8> %8, i32 0 730 %10 = sext i8 %9 to i16 731 ret i16 %10 732} 733 734define i8 @test_v16i8_sext(<16 x i8> %a0, <16 x i8> %a1) { 735; SSE-LABEL: test_v16i8_sext: 736; SSE: # %bb.0: 737; SSE-NEXT: pcmpgtb %xmm1, %xmm0 738; SSE-NEXT: pmovmskb %xmm0, %ecx 739; SSE-NEXT: xorl %eax, %eax 740; SSE-NEXT: negl %ecx 741; SSE-NEXT: sbbl %eax, %eax 742; SSE-NEXT: # kill: def $al killed $al killed $eax 743; SSE-NEXT: retq 744; 745; AVX-LABEL: test_v16i8_sext: 746; AVX: # %bb.0: 747; AVX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0 748; AVX-NEXT: vpmovmskb %xmm0, %ecx 749; AVX-NEXT: xorl %eax, %eax 750; AVX-NEXT: negl %ecx 751; AVX-NEXT: sbbl %eax, %eax 752; AVX-NEXT: # kill: def $al killed $al killed $eax 753; AVX-NEXT: retq 754 %c = icmp sgt <16 x i8> %a0, %a1 755 %s = sext <16 x i1> %c to <16 x i8> 756 %1 = shufflevector <16 x i8> %s, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 757 %2 = or <16 x i8> %s, %1 758 %3 = shufflevector <16 x i8> %2, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 759 %4 = or <16 x i8> %2, %3 760 %5 = shufflevector <16 x i8> %4, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 761 %6 = or <16 x i8> %4, %5 762 %7 = shufflevector <16 x i8> %6, <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 763 %8 = or <16 x i8> %6, %7 764 %9 = extractelement <16 x i8> %8, i32 0 765 ret i8 %9 766} 767 768define i8 @test_v32i8_sext(<32 x i8> %a0, <32 x i8> %a1) { 769; SSE-LABEL: test_v32i8_sext: 770; SSE: # %bb.0: 771; SSE-NEXT: pcmpgtb %xmm3, %xmm1 772; SSE-NEXT: pcmpgtb %xmm2, %xmm0 773; SSE-NEXT: por %xmm1, %xmm0 774; SSE-NEXT: pmovmskb %xmm0, %ecx 775; SSE-NEXT: xorl %eax, %eax 776; SSE-NEXT: negl %ecx 777; SSE-NEXT: sbbl %eax, %eax 778; SSE-NEXT: # kill: def $al killed $al killed $eax 779; SSE-NEXT: retq 780; 781; AVX1-LABEL: test_v32i8_sext: 782; AVX1: # %bb.0: 783; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 784; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 785; AVX1-NEXT: vpcmpgtb %xmm2, %xmm3, %xmm2 786; AVX1-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0 787; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0 788; AVX1-NEXT: vpmovmskb %xmm0, %ecx 789; AVX1-NEXT: xorl %eax, %eax 790; AVX1-NEXT: negl %ecx 791; AVX1-NEXT: sbbl %eax, %eax 792; AVX1-NEXT: # kill: def $al killed $al killed $eax 793; AVX1-NEXT: vzeroupper 794; AVX1-NEXT: retq 795; 796; AVX2-LABEL: test_v32i8_sext: 797; AVX2: # %bb.0: 798; AVX2-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 799; AVX2-NEXT: vpmovmskb %ymm0, %ecx 800; AVX2-NEXT: xorl %eax, %eax 801; AVX2-NEXT: negl %ecx 802; AVX2-NEXT: sbbl %eax, %eax 803; AVX2-NEXT: # kill: def $al killed $al killed $eax 804; AVX2-NEXT: vzeroupper 805; AVX2-NEXT: retq 806; 807; AVX512-LABEL: test_v32i8_sext: 808; AVX512: # %bb.0: 809; AVX512-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 810; AVX512-NEXT: vpmovmskb %ymm0, %ecx 811; AVX512-NEXT: xorl %eax, %eax 812; AVX512-NEXT: negl %ecx 813; AVX512-NEXT: sbbl %eax, %eax 814; AVX512-NEXT: # kill: def $al killed $al killed $eax 815; AVX512-NEXT: vzeroupper 816; AVX512-NEXT: retq 817 %c = icmp sgt <32 x i8> %a0, %a1 818 %s = sext <32 x i1> %c to <32 x i8> 819 %1 = shufflevector <32 x i8> %s, <32 x i8> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 820 %2 = or <32 x i8> %s, %1 821 %3 = shufflevector <32 x i8> %2, <32 x i8> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 822 %4 = or <32 x i8> %2, %3 823 %5 = shufflevector <32 x i8> %4, <32 x i8> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 824 %6 = or <32 x i8> %4, %5 825 %7 = shufflevector <32 x i8> %6, <32 x i8> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 826 %8 = or <32 x i8> %6, %7 827 %9 = shufflevector <32 x i8> %8, <32 x i8> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 828 %10 = or <32 x i8> %8, %9 829 %11 = extractelement <32 x i8> %10, i32 0 830 ret i8 %11 831} 832 833define i1 @bool_reduction_v2f64(<2 x double> %x, <2 x double> %y) { 834; SSE-LABEL: bool_reduction_v2f64: 835; SSE: # %bb.0: 836; SSE-NEXT: cmpltpd %xmm0, %xmm1 837; SSE-NEXT: movmskpd %xmm1, %eax 838; SSE-NEXT: testl %eax, %eax 839; SSE-NEXT: setne %al 840; SSE-NEXT: retq 841; 842; AVX1OR2-LABEL: bool_reduction_v2f64: 843; AVX1OR2: # %bb.0: 844; AVX1OR2-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0 845; AVX1OR2-NEXT: vtestpd %xmm0, %xmm0 846; AVX1OR2-NEXT: setne %al 847; AVX1OR2-NEXT: retq 848; 849; AVX512-LABEL: bool_reduction_v2f64: 850; AVX512: # %bb.0: 851; AVX512-NEXT: vcmpltpd %xmm0, %xmm1, %k0 852; AVX512-NEXT: kmovd %k0, %eax 853; AVX512-NEXT: testb %al, %al 854; AVX512-NEXT: setne %al 855; AVX512-NEXT: retq 856 %a = fcmp ogt <2 x double> %x, %y 857 %b = shufflevector <2 x i1> %a, <2 x i1> undef, <2 x i32> <i32 1, i32 undef> 858 %c = or <2 x i1> %a, %b 859 %d = extractelement <2 x i1> %c, i32 0 860 ret i1 %d 861} 862 863define i1 @bool_reduction_v4f32(<4 x float> %x, <4 x float> %y) { 864; SSE-LABEL: bool_reduction_v4f32: 865; SSE: # %bb.0: 866; SSE-NEXT: cmpeqps %xmm1, %xmm0 867; SSE-NEXT: movmskps %xmm0, %eax 868; SSE-NEXT: testl %eax, %eax 869; SSE-NEXT: setne %al 870; SSE-NEXT: retq 871; 872; AVX1OR2-LABEL: bool_reduction_v4f32: 873; AVX1OR2: # %bb.0: 874; AVX1OR2-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0 875; AVX1OR2-NEXT: vtestps %xmm0, %xmm0 876; AVX1OR2-NEXT: setne %al 877; AVX1OR2-NEXT: retq 878; 879; AVX512-LABEL: bool_reduction_v4f32: 880; AVX512: # %bb.0: 881; AVX512-NEXT: vcmpeqps %xmm1, %xmm0, %k0 882; AVX512-NEXT: kmovd %k0, %eax 883; AVX512-NEXT: testb %al, %al 884; AVX512-NEXT: setne %al 885; AVX512-NEXT: retq 886 %a = fcmp oeq <4 x float> %x, %y 887 %s1 = shufflevector <4 x i1> %a, <4 x i1> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> 888 %b = or <4 x i1> %s1, %a 889 %s2 = shufflevector <4 x i1> %b, <4 x i1> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 890 %c = or <4 x i1> %s2, %b 891 %d = extractelement <4 x i1> %c, i32 0 892 ret i1 %d 893} 894 895define i1 @bool_reduction_v4f64(<4 x double> %x, <4 x double> %y) { 896; SSE-LABEL: bool_reduction_v4f64: 897; SSE: # %bb.0: 898; SSE-NEXT: cmplepd %xmm1, %xmm3 899; SSE-NEXT: cmplepd %xmm0, %xmm2 900; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2] 901; SSE-NEXT: movmskps %xmm2, %eax 902; SSE-NEXT: testl %eax, %eax 903; SSE-NEXT: setne %al 904; SSE-NEXT: retq 905; 906; AVX1OR2-LABEL: bool_reduction_v4f64: 907; AVX1OR2: # %bb.0: 908; AVX1OR2-NEXT: vcmplepd %ymm0, %ymm1, %ymm0 909; AVX1OR2-NEXT: vtestpd %ymm0, %ymm0 910; AVX1OR2-NEXT: setne %al 911; AVX1OR2-NEXT: vzeroupper 912; AVX1OR2-NEXT: retq 913; 914; AVX512-LABEL: bool_reduction_v4f64: 915; AVX512: # %bb.0: 916; AVX512-NEXT: vcmplepd %ymm0, %ymm1, %k0 917; AVX512-NEXT: kmovd %k0, %eax 918; AVX512-NEXT: testb %al, %al 919; AVX512-NEXT: setne %al 920; AVX512-NEXT: vzeroupper 921; AVX512-NEXT: retq 922 %a = fcmp oge <4 x double> %x, %y 923 %s1 = shufflevector <4 x i1> %a, <4 x i1> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> 924 %b = or <4 x i1> %s1, %a 925 %s2 = shufflevector <4 x i1> %b, <4 x i1> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 926 %c = or <4 x i1> %s2, %b 927 %d = extractelement <4 x i1> %c, i32 0 928 ret i1 %d 929} 930 931define i1 @bool_reduction_v8f32(<8 x float> %x, <8 x float> %y) { 932; SSE-LABEL: bool_reduction_v8f32: 933; SSE: # %bb.0: 934; SSE-NEXT: cmpneqps %xmm3, %xmm1 935; SSE-NEXT: cmpneqps %xmm2, %xmm0 936; SSE-NEXT: packssdw %xmm1, %xmm0 937; SSE-NEXT: pmovmskb %xmm0, %eax 938; SSE-NEXT: testl %eax, %eax 939; SSE-NEXT: setne %al 940; SSE-NEXT: retq 941; 942; AVX1OR2-LABEL: bool_reduction_v8f32: 943; AVX1OR2: # %bb.0: 944; AVX1OR2-NEXT: vcmpneqps %ymm1, %ymm0, %ymm0 945; AVX1OR2-NEXT: vtestps %ymm0, %ymm0 946; AVX1OR2-NEXT: setne %al 947; AVX1OR2-NEXT: vzeroupper 948; AVX1OR2-NEXT: retq 949; 950; AVX512-LABEL: bool_reduction_v8f32: 951; AVX512: # %bb.0: 952; AVX512-NEXT: vcmpneqps %ymm1, %ymm0, %k0 953; AVX512-NEXT: kmovd %k0, %eax 954; AVX512-NEXT: testb %al, %al 955; AVX512-NEXT: setne %al 956; AVX512-NEXT: vzeroupper 957; AVX512-NEXT: retq 958 %a = fcmp une <8 x float> %x, %y 959 %s1 = shufflevector <8 x i1> %a, <8 x i1> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef> 960 %b = or <8 x i1> %s1, %a 961 %s2 = shufflevector <8 x i1> %b, <8 x i1> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 962 %c = or <8 x i1> %s2, %b 963 %s3 = shufflevector <8 x i1> %c, <8 x i1> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 964 %d = or <8 x i1> %s3, %c 965 %e = extractelement <8 x i1> %d, i32 0 966 ret i1 %e 967} 968 969define i1 @bool_reduction_v2i64(<2 x i64> %x, <2 x i64> %y) { 970; SSE2-LABEL: bool_reduction_v2i64: 971; SSE2: # %bb.0: 972; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 973; SSE2-NEXT: movmskps %xmm0, %eax 974; SSE2-NEXT: xorl $15, %eax 975; SSE2-NEXT: setne %al 976; SSE2-NEXT: retq 977; 978; SSE42-LABEL: bool_reduction_v2i64: 979; SSE42: # %bb.0: 980; SSE42-NEXT: pxor %xmm1, %xmm0 981; SSE42-NEXT: ptest %xmm0, %xmm0 982; SSE42-NEXT: setne %al 983; SSE42-NEXT: retq 984; 985; AVX-LABEL: bool_reduction_v2i64: 986; AVX: # %bb.0: 987; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 988; AVX-NEXT: vptest %xmm0, %xmm0 989; AVX-NEXT: setne %al 990; AVX-NEXT: retq 991 %a = icmp ne <2 x i64> %x, %y 992 %b = shufflevector <2 x i1> %a, <2 x i1> undef, <2 x i32> <i32 1, i32 undef> 993 %c = or <2 x i1> %a, %b 994 %d = extractelement <2 x i1> %c, i32 0 995 ret i1 %d 996} 997 998define i1 @bool_reduction_v4i32(<4 x i32> %x, <4 x i32> %y) { 999; SSE2-LABEL: bool_reduction_v4i32: 1000; SSE2: # %bb.0: 1001; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] 1002; SSE2-NEXT: pxor %xmm2, %xmm1 1003; SSE2-NEXT: pxor %xmm2, %xmm0 1004; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 1005; SSE2-NEXT: movmskps %xmm0, %eax 1006; SSE2-NEXT: testl %eax, %eax 1007; SSE2-NEXT: setne %al 1008; SSE2-NEXT: retq 1009; 1010; SSE42-LABEL: bool_reduction_v4i32: 1011; SSE42: # %bb.0: 1012; SSE42-NEXT: pminud %xmm0, %xmm1 1013; SSE42-NEXT: pxor %xmm0, %xmm1 1014; SSE42-NEXT: ptest %xmm1, %xmm1 1015; SSE42-NEXT: setne %al 1016; SSE42-NEXT: retq 1017; 1018; AVX1OR2-LABEL: bool_reduction_v4i32: 1019; AVX1OR2: # %bb.0: 1020; AVX1OR2-NEXT: vpminud %xmm1, %xmm0, %xmm1 1021; AVX1OR2-NEXT: vpxor %xmm1, %xmm0, %xmm0 1022; AVX1OR2-NEXT: vptest %xmm0, %xmm0 1023; AVX1OR2-NEXT: setne %al 1024; AVX1OR2-NEXT: retq 1025; 1026; AVX512-LABEL: bool_reduction_v4i32: 1027; AVX512: # %bb.0: 1028; AVX512-NEXT: vpcmpnleud %xmm1, %xmm0, %k0 1029; AVX512-NEXT: kmovd %k0, %eax 1030; AVX512-NEXT: testb %al, %al 1031; AVX512-NEXT: setne %al 1032; AVX512-NEXT: retq 1033 %a = icmp ugt <4 x i32> %x, %y 1034 %s1 = shufflevector <4 x i1> %a, <4 x i1> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> 1035 %b = or <4 x i1> %s1, %a 1036 %s2 = shufflevector <4 x i1> %b, <4 x i1> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 1037 %c = or <4 x i1> %s2, %b 1038 %d = extractelement <4 x i1> %c, i32 0 1039 ret i1 %d 1040} 1041 1042define i1 @bool_reduction_v8i16(<8 x i16> %x, <8 x i16> %y) { 1043; SSE-LABEL: bool_reduction_v8i16: 1044; SSE: # %bb.0: 1045; SSE-NEXT: pcmpgtw %xmm0, %xmm1 1046; SSE-NEXT: pmovmskb %xmm1, %eax 1047; SSE-NEXT: testl %eax, %eax 1048; SSE-NEXT: setne %al 1049; SSE-NEXT: retq 1050; 1051; AVX1OR2-LABEL: bool_reduction_v8i16: 1052; AVX1OR2: # %bb.0: 1053; AVX1OR2-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 1054; AVX1OR2-NEXT: vpmovmskb %xmm0, %eax 1055; AVX1OR2-NEXT: testl %eax, %eax 1056; AVX1OR2-NEXT: setne %al 1057; AVX1OR2-NEXT: retq 1058; 1059; AVX512-LABEL: bool_reduction_v8i16: 1060; AVX512: # %bb.0: 1061; AVX512-NEXT: vpcmpgtw %xmm0, %xmm1, %k0 1062; AVX512-NEXT: kmovd %k0, %eax 1063; AVX512-NEXT: testb %al, %al 1064; AVX512-NEXT: setne %al 1065; AVX512-NEXT: retq 1066 %a = icmp slt <8 x i16> %x, %y 1067 %s1 = shufflevector <8 x i1> %a, <8 x i1> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef> 1068 %b = or <8 x i1> %s1, %a 1069 %s2 = shufflevector <8 x i1> %b, <8 x i1> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1070 %c = or <8 x i1> %s2, %b 1071 %s3 = shufflevector <8 x i1> %c, <8 x i1> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1072 %d = or <8 x i1> %s3, %c 1073 %e = extractelement <8 x i1> %d, i32 0 1074 ret i1 %e 1075} 1076 1077define i1 @bool_reduction_v16i8(<16 x i8> %x, <16 x i8> %y) { 1078; SSE-LABEL: bool_reduction_v16i8: 1079; SSE: # %bb.0: 1080; SSE-NEXT: pcmpgtb %xmm1, %xmm0 1081; SSE-NEXT: pmovmskb %xmm0, %eax 1082; SSE-NEXT: testl %eax, %eax 1083; SSE-NEXT: setne %al 1084; SSE-NEXT: retq 1085; 1086; AVX1OR2-LABEL: bool_reduction_v16i8: 1087; AVX1OR2: # %bb.0: 1088; AVX1OR2-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0 1089; AVX1OR2-NEXT: vpmovmskb %xmm0, %eax 1090; AVX1OR2-NEXT: testl %eax, %eax 1091; AVX1OR2-NEXT: setne %al 1092; AVX1OR2-NEXT: retq 1093; 1094; AVX512-LABEL: bool_reduction_v16i8: 1095; AVX512: # %bb.0: 1096; AVX512-NEXT: vpcmpgtb %xmm1, %xmm0, %k0 1097; AVX512-NEXT: kortestw %k0, %k0 1098; AVX512-NEXT: setne %al 1099; AVX512-NEXT: retq 1100 %a = icmp sgt <16 x i8> %x, %y 1101 %s1 = shufflevector <16 x i1> %a, <16 x i1> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1102 %b = or <16 x i1> %s1, %a 1103 %s2 = shufflevector <16 x i1> %b, <16 x i1> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1104 %c = or <16 x i1> %s2, %b 1105 %s3 = shufflevector <16 x i1> %c, <16 x i1> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1106 %d = or <16 x i1> %s3, %c 1107 %s4 = shufflevector <16 x i1> %d, <16 x i1> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1108 %e = or <16 x i1> %s4, %d 1109 %f = extractelement <16 x i1> %e, i32 0 1110 ret i1 %f 1111} 1112 1113define i1 @bool_reduction_v4i64(<4 x i64> %x, <4 x i64> %y) { 1114; SSE2-LABEL: bool_reduction_v4i64: 1115; SSE2: # %bb.0: 1116; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648] 1117; SSE2-NEXT: pxor %xmm4, %xmm1 1118; SSE2-NEXT: pxor %xmm4, %xmm3 1119; SSE2-NEXT: movdqa %xmm3, %xmm5 1120; SSE2-NEXT: pcmpgtd %xmm1, %xmm5 1121; SSE2-NEXT: pxor %xmm4, %xmm0 1122; SSE2-NEXT: pxor %xmm4, %xmm2 1123; SSE2-NEXT: movdqa %xmm2, %xmm4 1124; SSE2-NEXT: pcmpgtd %xmm0, %xmm4 1125; SSE2-NEXT: movdqa %xmm4, %xmm6 1126; SSE2-NEXT: shufps {{.*#+}} xmm6 = xmm6[0,2],xmm5[0,2] 1127; SSE2-NEXT: pcmpeqd %xmm1, %xmm3 1128; SSE2-NEXT: pcmpeqd %xmm0, %xmm2 1129; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,3],xmm3[1,3] 1130; SSE2-NEXT: andps %xmm6, %xmm2 1131; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,3],xmm5[1,3] 1132; SSE2-NEXT: orps %xmm2, %xmm4 1133; SSE2-NEXT: movmskps %xmm4, %eax 1134; SSE2-NEXT: testl %eax, %eax 1135; SSE2-NEXT: setne %al 1136; SSE2-NEXT: retq 1137; 1138; SSE42-LABEL: bool_reduction_v4i64: 1139; SSE42: # %bb.0: 1140; SSE42-NEXT: pcmpgtq %xmm1, %xmm3 1141; SSE42-NEXT: pcmpgtq %xmm0, %xmm2 1142; SSE42-NEXT: packssdw %xmm3, %xmm2 1143; SSE42-NEXT: movmskps %xmm2, %eax 1144; SSE42-NEXT: testl %eax, %eax 1145; SSE42-NEXT: setne %al 1146; SSE42-NEXT: retq 1147; 1148; AVX1-LABEL: bool_reduction_v4i64: 1149; AVX1: # %bb.0: 1150; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 1151; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 1152; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 1153; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 1154; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0 1155; AVX1-NEXT: vtestpd %xmm0, %xmm0 1156; AVX1-NEXT: setne %al 1157; AVX1-NEXT: vzeroupper 1158; AVX1-NEXT: retq 1159; 1160; AVX2-LABEL: bool_reduction_v4i64: 1161; AVX2: # %bb.0: 1162; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 1163; AVX2-NEXT: vtestpd %ymm0, %ymm0 1164; AVX2-NEXT: setne %al 1165; AVX2-NEXT: vzeroupper 1166; AVX2-NEXT: retq 1167; 1168; AVX512-LABEL: bool_reduction_v4i64: 1169; AVX512: # %bb.0: 1170; AVX512-NEXT: vpcmpgtq %ymm0, %ymm1, %k0 1171; AVX512-NEXT: kmovd %k0, %eax 1172; AVX512-NEXT: testb %al, %al 1173; AVX512-NEXT: setne %al 1174; AVX512-NEXT: vzeroupper 1175; AVX512-NEXT: retq 1176 %a = icmp slt <4 x i64> %x, %y 1177 %s1 = shufflevector <4 x i1> %a, <4 x i1> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> 1178 %b = or <4 x i1> %s1, %a 1179 %s2 = shufflevector <4 x i1> %b, <4 x i1> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 1180 %c = or <4 x i1> %s2, %b 1181 %d = extractelement <4 x i1> %c, i32 0 1182 ret i1 %d 1183} 1184 1185define i1 @bool_reduction_v8i32(<8 x i32> %x, <8 x i32> %y) { 1186; SSE2-LABEL: bool_reduction_v8i32: 1187; SSE2: # %bb.0: 1188; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648] 1189; SSE2-NEXT: pxor %xmm4, %xmm3 1190; SSE2-NEXT: pxor %xmm4, %xmm1 1191; SSE2-NEXT: pcmpgtd %xmm3, %xmm1 1192; SSE2-NEXT: pxor %xmm4, %xmm2 1193; SSE2-NEXT: pxor %xmm4, %xmm0 1194; SSE2-NEXT: pcmpgtd %xmm2, %xmm0 1195; SSE2-NEXT: packssdw %xmm1, %xmm0 1196; SSE2-NEXT: packsswb %xmm0, %xmm0 1197; SSE2-NEXT: pmovmskb %xmm0, %eax 1198; SSE2-NEXT: xorb $-1, %al 1199; SSE2-NEXT: setne %al 1200; SSE2-NEXT: retq 1201; 1202; SSE42-LABEL: bool_reduction_v8i32: 1203; SSE42: # %bb.0: 1204; SSE42-NEXT: pminud %xmm1, %xmm3 1205; SSE42-NEXT: pcmpeqd %xmm1, %xmm3 1206; SSE42-NEXT: pminud %xmm0, %xmm2 1207; SSE42-NEXT: pcmpeqd %xmm0, %xmm2 1208; SSE42-NEXT: packssdw %xmm3, %xmm2 1209; SSE42-NEXT: pmovmskb %xmm2, %eax 1210; SSE42-NEXT: testl %eax, %eax 1211; SSE42-NEXT: setne %al 1212; SSE42-NEXT: retq 1213; 1214; AVX1-LABEL: bool_reduction_v8i32: 1215; AVX1: # %bb.0: 1216; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1217; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1218; AVX1-NEXT: vpminud %xmm2, %xmm3, %xmm2 1219; AVX1-NEXT: vpcmpeqd %xmm2, %xmm3, %xmm2 1220; AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm1 1221; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1222; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0 1223; AVX1-NEXT: vtestps %xmm0, %xmm0 1224; AVX1-NEXT: setne %al 1225; AVX1-NEXT: vzeroupper 1226; AVX1-NEXT: retq 1227; 1228; AVX2-LABEL: bool_reduction_v8i32: 1229; AVX2: # %bb.0: 1230; AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm1 1231; AVX2-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 1232; AVX2-NEXT: vtestps %ymm0, %ymm0 1233; AVX2-NEXT: setne %al 1234; AVX2-NEXT: vzeroupper 1235; AVX2-NEXT: retq 1236; 1237; AVX512-LABEL: bool_reduction_v8i32: 1238; AVX512: # %bb.0: 1239; AVX512-NEXT: vpcmpleud %ymm1, %ymm0, %k0 1240; AVX512-NEXT: kmovd %k0, %eax 1241; AVX512-NEXT: testb %al, %al 1242; AVX512-NEXT: setne %al 1243; AVX512-NEXT: vzeroupper 1244; AVX512-NEXT: retq 1245 %a = icmp ule <8 x i32> %x, %y 1246 %s1 = shufflevector <8 x i1> %a, <8 x i1> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef> 1247 %b = or <8 x i1> %s1, %a 1248 %s2 = shufflevector <8 x i1> %b, <8 x i1> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1249 %c = or <8 x i1> %s2, %b 1250 %s3 = shufflevector <8 x i1> %c, <8 x i1> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1251 %d = or <8 x i1> %s3, %c 1252 %e = extractelement <8 x i1> %d, i32 0 1253 ret i1 %e 1254} 1255 1256define i1 @bool_reduction_v16i16(<16 x i16> %x, <16 x i16> %y) { 1257; SSE-LABEL: bool_reduction_v16i16: 1258; SSE: # %bb.0: 1259; SSE-NEXT: pcmpeqw %xmm3, %xmm1 1260; SSE-NEXT: pcmpeqw %xmm2, %xmm0 1261; SSE-NEXT: packsswb %xmm1, %xmm0 1262; SSE-NEXT: pmovmskb %xmm0, %eax 1263; SSE-NEXT: testl %eax, %eax 1264; SSE-NEXT: setne %al 1265; SSE-NEXT: retq 1266; 1267; AVX1-LABEL: bool_reduction_v16i16: 1268; AVX1: # %bb.0: 1269; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1270; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1271; AVX1-NEXT: vpcmpeqw %xmm2, %xmm3, %xmm2 1272; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 1273; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0 1274; AVX1-NEXT: vpmovmskb %xmm0, %eax 1275; AVX1-NEXT: testl %eax, %eax 1276; AVX1-NEXT: setne %al 1277; AVX1-NEXT: vzeroupper 1278; AVX1-NEXT: retq 1279; 1280; AVX2-LABEL: bool_reduction_v16i16: 1281; AVX2: # %bb.0: 1282; AVX2-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 1283; AVX2-NEXT: vpmovmskb %ymm0, %eax 1284; AVX2-NEXT: testl %eax, %eax 1285; AVX2-NEXT: setne %al 1286; AVX2-NEXT: vzeroupper 1287; AVX2-NEXT: retq 1288; 1289; AVX512-LABEL: bool_reduction_v16i16: 1290; AVX512: # %bb.0: 1291; AVX512-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 1292; AVX512-NEXT: kortestw %k0, %k0 1293; AVX512-NEXT: setne %al 1294; AVX512-NEXT: vzeroupper 1295; AVX512-NEXT: retq 1296 %a = icmp eq <16 x i16> %x, %y 1297 %s1 = shufflevector <16 x i1> %a, <16 x i1> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1298 %b = or <16 x i1> %s1, %a 1299 %s2 = shufflevector <16 x i1> %b, <16 x i1> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1300 %c = or <16 x i1> %s2, %b 1301 %s3 = shufflevector <16 x i1> %c, <16 x i1> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1302 %d = or <16 x i1> %s3, %c 1303 %s4 = shufflevector <16 x i1> %d, <16 x i1> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1304 %e = or <16 x i1> %s4, %d 1305 %f = extractelement <16 x i1> %e, i32 0 1306 ret i1 %f 1307} 1308 1309define i1 @bool_reduction_v32i8(<32 x i8> %x, <32 x i8> %y) { 1310; SSE-LABEL: bool_reduction_v32i8: 1311; SSE: # %bb.0: 1312; SSE-NEXT: pcmpeqb %xmm3, %xmm1 1313; SSE-NEXT: pcmpeqb %xmm2, %xmm0 1314; SSE-NEXT: por %xmm1, %xmm0 1315; SSE-NEXT: pmovmskb %xmm0, %eax 1316; SSE-NEXT: testl %eax, %eax 1317; SSE-NEXT: setne %al 1318; SSE-NEXT: retq 1319; 1320; AVX1-LABEL: bool_reduction_v32i8: 1321; AVX1: # %bb.0: 1322; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1323; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1324; AVX1-NEXT: vpcmpeqb %xmm2, %xmm3, %xmm2 1325; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 1326; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0 1327; AVX1-NEXT: vpmovmskb %xmm0, %eax 1328; AVX1-NEXT: testl %eax, %eax 1329; AVX1-NEXT: setne %al 1330; AVX1-NEXT: vzeroupper 1331; AVX1-NEXT: retq 1332; 1333; AVX2-LABEL: bool_reduction_v32i8: 1334; AVX2: # %bb.0: 1335; AVX2-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 1336; AVX2-NEXT: vpmovmskb %ymm0, %eax 1337; AVX2-NEXT: testl %eax, %eax 1338; AVX2-NEXT: setne %al 1339; AVX2-NEXT: vzeroupper 1340; AVX2-NEXT: retq 1341; 1342; AVX512-LABEL: bool_reduction_v32i8: 1343; AVX512: # %bb.0: 1344; AVX512-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 1345; AVX512-NEXT: kortestd %k0, %k0 1346; AVX512-NEXT: setne %al 1347; AVX512-NEXT: vzeroupper 1348; AVX512-NEXT: retq 1349 %a = icmp eq <32 x i8> %x, %y 1350 %s1 = shufflevector <32 x i1> %a, <32 x i1> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1351 %b = or <32 x i1> %s1, %a 1352 %s2 = shufflevector <32 x i1> %b, <32 x i1> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1353 %c = or <32 x i1> %s2, %b 1354 %s3 = shufflevector <32 x i1> %c, <32 x i1> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1355 %d = or <32 x i1> %s3, %c 1356 %s4 = shufflevector <32 x i1> %d, <32 x i1> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1357 %e = or <32 x i1> %s4, %d 1358 %s5 = shufflevector <32 x i1> %e, <32 x i1> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1359 %f = or <32 x i1> %s5, %e 1360 %g = extractelement <32 x i1> %f, i32 0 1361 ret i1 %g 1362} 1363 1364define {i32, i1} @test_v16i8_muti_uses(<16 x i8> %x, <16 x i8>%y, <16 x i8> %z) { 1365; SSE-LABEL: test_v16i8_muti_uses: 1366; SSE: # %bb.0: 1367; SSE-NEXT: pcmpeqb %xmm1, %xmm0 1368; SSE-NEXT: pcmpeqb %xmm1, %xmm2 1369; SSE-NEXT: pmovmskb %xmm0, %ecx 1370; SSE-NEXT: pmovmskb %xmm2, %eax 1371; SSE-NEXT: shll $16, %eax 1372; SSE-NEXT: orl %ecx, %eax 1373; SSE-NEXT: sete %dl 1374; SSE-NEXT: retq 1375; 1376; AVX1-LABEL: test_v16i8_muti_uses: 1377; AVX1: # %bb.0: 1378; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 1379; AVX1-NEXT: vpcmpeqb %xmm1, %xmm2, %xmm1 1380; AVX1-NEXT: vpmovmskb %xmm0, %ecx 1381; AVX1-NEXT: vpmovmskb %xmm1, %eax 1382; AVX1-NEXT: shll $16, %eax 1383; AVX1-NEXT: orl %ecx, %eax 1384; AVX1-NEXT: sete %dl 1385; AVX1-NEXT: retq 1386; 1387; AVX2-LABEL: test_v16i8_muti_uses: 1388; AVX2: # %bb.0: 1389; AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 1390; AVX2-NEXT: vpcmpeqb %xmm1, %xmm2, %xmm1 1391; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 1392; AVX2-NEXT: vpmovmskb %ymm0, %eax 1393; AVX2-NEXT: testl %eax, %eax 1394; AVX2-NEXT: sete %dl 1395; AVX2-NEXT: vzeroupper 1396; AVX2-NEXT: retq 1397; 1398; AVX512-LABEL: test_v16i8_muti_uses: 1399; AVX512: # %bb.0: 1400; AVX512-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 1401; AVX512-NEXT: vpcmpeqb %xmm1, %xmm2, %k1 1402; AVX512-NEXT: kunpckwd %k0, %k1, %k0 1403; AVX512-NEXT: kortestd %k0, %k0 1404; AVX512-NEXT: kmovd %k0, %eax 1405; AVX512-NEXT: sete %dl 1406; AVX512-NEXT: retq 1407 %t1 = icmp eq <16 x i8> %x, %y 1408 %t2 = icmp eq <16 x i8> %z, %y 1409 %a = shufflevector <16 x i1> %t1, <16 x i1> %t2, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 1410 %b = bitcast <32 x i1> %a to i32 1411 %c = icmp eq i32 %b, 0 1412 %r1 = insertvalue {i32, i1} poison, i32 %b, 0 1413 %r2 = insertvalue {i32, i1} %r1, i1 %c, 1 1414 ret {i32, i1} %r2 1415} 1416 1417; PR59867 1418define i1 @select_v2i8(ptr %s0, ptr %s1) { 1419; SSE2-LABEL: select_v2i8: 1420; SSE2: # %bb.0: 1421; SSE2-NEXT: movzwl (%rdi), %eax 1422; SSE2-NEXT: movd %eax, %xmm0 1423; SSE2-NEXT: movzwl (%rsi), %eax 1424; SSE2-NEXT: movd %eax, %xmm1 1425; SSE2-NEXT: pcmpeqb %xmm0, %xmm1 1426; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1427; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[0,0,1,1,4,5,6,7] 1428; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 1429; SSE2-NEXT: movmskpd %xmm0, %eax 1430; SSE2-NEXT: testl %eax, %eax 1431; SSE2-NEXT: setne %al 1432; SSE2-NEXT: retq 1433; 1434; SSE42-LABEL: select_v2i8: 1435; SSE42: # %bb.0: 1436; SSE42-NEXT: movzwl (%rdi), %eax 1437; SSE42-NEXT: movd %eax, %xmm0 1438; SSE42-NEXT: movzwl (%rsi), %eax 1439; SSE42-NEXT: movd %eax, %xmm1 1440; SSE42-NEXT: pcmpeqb %xmm0, %xmm1 1441; SSE42-NEXT: pmovsxbq %xmm1, %xmm0 1442; SSE42-NEXT: movmskpd %xmm0, %eax 1443; SSE42-NEXT: testl %eax, %eax 1444; SSE42-NEXT: setne %al 1445; SSE42-NEXT: retq 1446; 1447; AVX1OR2-LABEL: select_v2i8: 1448; AVX1OR2: # %bb.0: 1449; AVX1OR2-NEXT: movzwl (%rdi), %eax 1450; AVX1OR2-NEXT: vmovd %eax, %xmm0 1451; AVX1OR2-NEXT: movzwl (%rsi), %eax 1452; AVX1OR2-NEXT: vmovd %eax, %xmm1 1453; AVX1OR2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 1454; AVX1OR2-NEXT: vpmovsxbq %xmm0, %xmm0 1455; AVX1OR2-NEXT: vtestpd %xmm0, %xmm0 1456; AVX1OR2-NEXT: setne %al 1457; AVX1OR2-NEXT: retq 1458; 1459; AVX512-LABEL: select_v2i8: 1460; AVX512: # %bb.0: 1461; AVX512-NEXT: movzwl (%rdi), %eax 1462; AVX512-NEXT: vmovd %eax, %xmm0 1463; AVX512-NEXT: movzwl (%rsi), %eax 1464; AVX512-NEXT: vmovd %eax, %xmm1 1465; AVX512-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 1466; AVX512-NEXT: kmovd %k0, %eax 1467; AVX512-NEXT: testb $3, %al 1468; AVX512-NEXT: setne %al 1469; AVX512-NEXT: retq 1470 %v0 = load <2 x i8>, ptr %s0, align 1 1471 %v1 = load <2 x i8>, ptr %s1, align 1 1472 %cmp = icmp eq <2 x i8> %v0, %v1 1473 %cmp0 = extractelement <2 x i1> %cmp, i32 0 1474 %cmp1 = extractelement <2 x i1> %cmp, i32 1 1475 %res = select i1 %cmp0, i1 true, i1 %cmp1 1476 ret i1 %res 1477} 1478