1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE42 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512F 7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512DQ 8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512BW 9 10; 11; 128-bit vector comparisons 12; 13 14define <2 x i1> @test_cmp_v2f64(<2 x double> %a0, <2 x double> %a1) nounwind { 15; SSE-LABEL: test_cmp_v2f64: 16; SSE: # %bb.0: 17; SSE-NEXT: cmpltpd %xmm0, %xmm1 18; SSE-NEXT: movapd %xmm1, %xmm0 19; SSE-NEXT: retq 20; 21; AVX-LABEL: test_cmp_v2f64: 22; AVX: # %bb.0: 23; AVX-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0 24; AVX-NEXT: retq 25 %1 = fcmp ogt <2 x double> %a0, %a1 26 ret <2 x i1> %1 27} 28 29define <4 x i1> @test_cmp_v4f32(<4 x float> %a0, <4 x float> %a1) nounwind { 30; SSE-LABEL: test_cmp_v4f32: 31; SSE: # %bb.0: 32; SSE-NEXT: cmpltps %xmm0, %xmm1 33; SSE-NEXT: movaps %xmm1, %xmm0 34; SSE-NEXT: retq 35; 36; AVX-LABEL: test_cmp_v4f32: 37; AVX: # %bb.0: 38; AVX-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 39; AVX-NEXT: retq 40 %1 = fcmp ogt <4 x float> %a0, %a1 41 ret <4 x i1> %1 42} 43 44define <2 x i1> @test_cmp_v2i64(<2 x i64> %a0, <2 x i64> %a1) nounwind { 45; SSE2-LABEL: test_cmp_v2i64: 46; SSE2: # %bb.0: 47; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648] 48; SSE2-NEXT: pxor %xmm2, %xmm1 49; SSE2-NEXT: pxor %xmm2, %xmm0 50; SSE2-NEXT: movdqa %xmm0, %xmm2 51; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 52; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 53; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 54; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 55; SSE2-NEXT: pand %xmm3, %xmm1 56; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 57; SSE2-NEXT: por %xmm1, %xmm0 58; SSE2-NEXT: retq 59; 60; SSE42-LABEL: test_cmp_v2i64: 61; SSE42: # %bb.0: 62; SSE42-NEXT: pcmpgtq %xmm1, %xmm0 63; SSE42-NEXT: retq 64; 65; AVX-LABEL: test_cmp_v2i64: 66; AVX: # %bb.0: 67; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 68; AVX-NEXT: retq 69 %1 = icmp sgt <2 x i64> %a0, %a1 70 ret <2 x i1> %1 71} 72 73define <4 x i1> @test_cmp_v4i32(<4 x i32> %a0, <4 x i32> %a1) nounwind { 74; SSE-LABEL: test_cmp_v4i32: 75; SSE: # %bb.0: 76; SSE-NEXT: pcmpgtd %xmm1, %xmm0 77; SSE-NEXT: retq 78; 79; AVX-LABEL: test_cmp_v4i32: 80; AVX: # %bb.0: 81; AVX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 82; AVX-NEXT: retq 83 %1 = icmp sgt <4 x i32> %a0, %a1 84 ret <4 x i1> %1 85} 86 87define <8 x i1> @test_cmp_v8i16(<8 x i16> %a0, <8 x i16> %a1) nounwind { 88; SSE-LABEL: test_cmp_v8i16: 89; SSE: # %bb.0: 90; SSE-NEXT: pcmpgtw %xmm1, %xmm0 91; SSE-NEXT: retq 92; 93; AVX-LABEL: test_cmp_v8i16: 94; AVX: # %bb.0: 95; AVX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 96; AVX-NEXT: retq 97 %1 = icmp sgt <8 x i16> %a0, %a1 98 ret <8 x i1> %1 99} 100 101define <16 x i1> @test_cmp_v16i8(<16 x i8> %a0, <16 x i8> %a1) nounwind { 102; SSE-LABEL: test_cmp_v16i8: 103; SSE: # %bb.0: 104; SSE-NEXT: pcmpgtb %xmm1, %xmm0 105; SSE-NEXT: retq 106; 107; AVX-LABEL: test_cmp_v16i8: 108; AVX: # %bb.0: 109; AVX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0 110; AVX-NEXT: retq 111 %1 = icmp sgt <16 x i8> %a0, %a1 112 ret <16 x i1> %1 113} 114 115; 116; 256-bit vector comparisons 117; 118 119define <4 x i1> @test_cmp_v4f64(<4 x double> %a0, <4 x double> %a1) nounwind { 120; SSE-LABEL: test_cmp_v4f64: 121; SSE: # %bb.0: 122; SSE-NEXT: cmpltpd %xmm1, %xmm3 123; SSE-NEXT: cmpltpd %xmm0, %xmm2 124; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2] 125; SSE-NEXT: movaps %xmm2, %xmm0 126; SSE-NEXT: retq 127; 128; AVX1-LABEL: test_cmp_v4f64: 129; AVX1: # %bb.0: 130; AVX1-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0 131; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 132; AVX1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 133; AVX1-NEXT: vzeroupper 134; AVX1-NEXT: retq 135; 136; AVX2-LABEL: test_cmp_v4f64: 137; AVX2: # %bb.0: 138; AVX2-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0 139; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1 140; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 141; AVX2-NEXT: vzeroupper 142; AVX2-NEXT: retq 143; 144; AVX512-LABEL: test_cmp_v4f64: 145; AVX512: # %bb.0: 146; AVX512-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0 147; AVX512-NEXT: vpmovqd %zmm0, %ymm0 148; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 149; AVX512-NEXT: vzeroupper 150; AVX512-NEXT: retq 151 %1 = fcmp ogt <4 x double> %a0, %a1 152 ret <4 x i1> %1 153} 154 155define <8 x i1> @test_cmp_v8f32(<8 x float> %a0, <8 x float> %a1) nounwind { 156; SSE-LABEL: test_cmp_v8f32: 157; SSE: # %bb.0: 158; SSE-NEXT: cmpltps %xmm1, %xmm3 159; SSE-NEXT: cmpltps %xmm0, %xmm2 160; SSE-NEXT: packssdw %xmm3, %xmm2 161; SSE-NEXT: movdqa %xmm2, %xmm0 162; SSE-NEXT: retq 163; 164; AVX1-LABEL: test_cmp_v8f32: 165; AVX1: # %bb.0: 166; AVX1-NEXT: vcmpltps %ymm0, %ymm1, %ymm0 167; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 168; AVX1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 169; AVX1-NEXT: vzeroupper 170; AVX1-NEXT: retq 171; 172; AVX2-LABEL: test_cmp_v8f32: 173; AVX2: # %bb.0: 174; AVX2-NEXT: vcmpltps %ymm0, %ymm1, %ymm0 175; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1 176; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 177; AVX2-NEXT: vzeroupper 178; AVX2-NEXT: retq 179; 180; AVX512-LABEL: test_cmp_v8f32: 181; AVX512: # %bb.0: 182; AVX512-NEXT: vcmpltps %ymm0, %ymm1, %ymm0 183; AVX512-NEXT: vpmovdw %zmm0, %ymm0 184; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 185; AVX512-NEXT: vzeroupper 186; AVX512-NEXT: retq 187 %1 = fcmp ogt <8 x float> %a0, %a1 188 ret <8 x i1> %1 189} 190 191define <4 x i1> @test_cmp_v4i64(<4 x i64> %a0, <4 x i64> %a1) nounwind { 192; SSE2-LABEL: test_cmp_v4i64: 193; SSE2: # %bb.0: 194; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648] 195; SSE2-NEXT: pxor %xmm4, %xmm3 196; SSE2-NEXT: pxor %xmm4, %xmm1 197; SSE2-NEXT: movdqa %xmm1, %xmm5 198; SSE2-NEXT: pcmpgtd %xmm3, %xmm5 199; SSE2-NEXT: pxor %xmm4, %xmm2 200; SSE2-NEXT: pxor %xmm4, %xmm0 201; SSE2-NEXT: movdqa %xmm0, %xmm4 202; SSE2-NEXT: pcmpgtd %xmm2, %xmm4 203; SSE2-NEXT: movdqa %xmm4, %xmm6 204; SSE2-NEXT: shufps {{.*#+}} xmm6 = xmm6[0,2],xmm5[0,2] 205; SSE2-NEXT: pcmpeqd %xmm3, %xmm1 206; SSE2-NEXT: pcmpeqd %xmm2, %xmm0 207; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3] 208; SSE2-NEXT: andps %xmm6, %xmm0 209; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,3],xmm5[1,3] 210; SSE2-NEXT: orps %xmm4, %xmm0 211; SSE2-NEXT: retq 212; 213; SSE42-LABEL: test_cmp_v4i64: 214; SSE42: # %bb.0: 215; SSE42-NEXT: pcmpgtq %xmm3, %xmm1 216; SSE42-NEXT: pcmpgtq %xmm2, %xmm0 217; SSE42-NEXT: packssdw %xmm1, %xmm0 218; SSE42-NEXT: retq 219; 220; AVX1-LABEL: test_cmp_v4i64: 221; AVX1: # %bb.0: 222; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 223; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 224; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 225; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 226; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 227; AVX1-NEXT: vzeroupper 228; AVX1-NEXT: retq 229; 230; AVX2-LABEL: test_cmp_v4i64: 231; AVX2: # %bb.0: 232; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 233; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 234; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 235; AVX2-NEXT: vzeroupper 236; AVX2-NEXT: retq 237; 238; AVX512-LABEL: test_cmp_v4i64: 239; AVX512: # %bb.0: 240; AVX512-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 241; AVX512-NEXT: vpmovqd %zmm0, %ymm0 242; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 243; AVX512-NEXT: vzeroupper 244; AVX512-NEXT: retq 245 %1 = icmp sgt <4 x i64> %a0, %a1 246 ret <4 x i1> %1 247} 248 249define <8 x i1> @test_cmp_v8i32(<8 x i32> %a0, <8 x i32> %a1) nounwind { 250; SSE-LABEL: test_cmp_v8i32: 251; SSE: # %bb.0: 252; SSE-NEXT: pcmpgtd %xmm3, %xmm1 253; SSE-NEXT: pcmpgtd %xmm2, %xmm0 254; SSE-NEXT: packssdw %xmm1, %xmm0 255; SSE-NEXT: retq 256; 257; AVX1-LABEL: test_cmp_v8i32: 258; AVX1: # %bb.0: 259; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 260; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 261; AVX1-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm2 262; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 263; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 264; AVX1-NEXT: vzeroupper 265; AVX1-NEXT: retq 266; 267; AVX2-LABEL: test_cmp_v8i32: 268; AVX2: # %bb.0: 269; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 270; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 271; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 272; AVX2-NEXT: vzeroupper 273; AVX2-NEXT: retq 274; 275; AVX512-LABEL: test_cmp_v8i32: 276; AVX512: # %bb.0: 277; AVX512-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 278; AVX512-NEXT: vpmovdw %zmm0, %ymm0 279; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 280; AVX512-NEXT: vzeroupper 281; AVX512-NEXT: retq 282 %1 = icmp sgt <8 x i32> %a0, %a1 283 ret <8 x i1> %1 284} 285 286define <16 x i1> @test_cmp_v16i16(<16 x i16> %a0, <16 x i16> %a1) nounwind { 287; SSE-LABEL: test_cmp_v16i16: 288; SSE: # %bb.0: 289; SSE-NEXT: pcmpgtw %xmm3, %xmm1 290; SSE-NEXT: pcmpgtw %xmm2, %xmm0 291; SSE-NEXT: packsswb %xmm1, %xmm0 292; SSE-NEXT: retq 293; 294; AVX1-LABEL: test_cmp_v16i16: 295; AVX1: # %bb.0: 296; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 297; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 298; AVX1-NEXT: vpcmpgtw %xmm2, %xmm3, %xmm2 299; AVX1-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 300; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0 301; AVX1-NEXT: vzeroupper 302; AVX1-NEXT: retq 303; 304; AVX2-LABEL: test_cmp_v16i16: 305; AVX2: # %bb.0: 306; AVX2-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 307; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 308; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 309; AVX2-NEXT: vzeroupper 310; AVX2-NEXT: retq 311; 312; AVX512F-LABEL: test_cmp_v16i16: 313; AVX512F: # %bb.0: 314; AVX512F-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 315; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero 316; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 317; AVX512F-NEXT: vzeroupper 318; AVX512F-NEXT: retq 319; 320; AVX512DQ-LABEL: test_cmp_v16i16: 321; AVX512DQ: # %bb.0: 322; AVX512DQ-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 323; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero 324; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 325; AVX512DQ-NEXT: vzeroupper 326; AVX512DQ-NEXT: retq 327; 328; AVX512BW-LABEL: test_cmp_v16i16: 329; AVX512BW: # %bb.0: 330; AVX512BW-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 331; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 332; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 333; AVX512BW-NEXT: vzeroupper 334; AVX512BW-NEXT: retq 335 %1 = icmp sgt <16 x i16> %a0, %a1 336 ret <16 x i1> %1 337} 338 339define <32 x i1> @test_cmp_v32i8(<32 x i8> %a0, <32 x i8> %a1) nounwind { 340; SSE-LABEL: test_cmp_v32i8: 341; SSE: # %bb.0: 342; SSE-NEXT: movq %rdi, %rax 343; SSE-NEXT: pcmpgtb %xmm2, %xmm0 344; SSE-NEXT: pmovmskb %xmm0, %ecx 345; SSE-NEXT: pcmpgtb %xmm3, %xmm1 346; SSE-NEXT: pmovmskb %xmm1, %edx 347; SSE-NEXT: shll $16, %edx 348; SSE-NEXT: orl %ecx, %edx 349; SSE-NEXT: movl %edx, (%rdi) 350; SSE-NEXT: retq 351; 352; AVX1-LABEL: test_cmp_v32i8: 353; AVX1: # %bb.0: 354; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 355; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 356; AVX1-NEXT: vpcmpgtb %xmm2, %xmm3, %xmm2 357; AVX1-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0 358; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 359; AVX1-NEXT: retq 360; 361; AVX2-LABEL: test_cmp_v32i8: 362; AVX2: # %bb.0: 363; AVX2-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 364; AVX2-NEXT: retq 365; 366; AVX512-LABEL: test_cmp_v32i8: 367; AVX512: # %bb.0: 368; AVX512-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 369; AVX512-NEXT: retq 370 %1 = icmp sgt <32 x i8> %a0, %a1 371 ret <32 x i1> %1 372} 373 374; 375; 512-bit vector comparisons 376; 377 378define <8 x i1> @test_cmp_v8f64(<8 x double> %a0, <8 x double> %a1) nounwind { 379; SSE-LABEL: test_cmp_v8f64: 380; SSE: # %bb.0: 381; SSE-NEXT: cmpltpd %xmm3, %xmm7 382; SSE-NEXT: cmpltpd %xmm2, %xmm6 383; SSE-NEXT: packssdw %xmm7, %xmm6 384; SSE-NEXT: cmpltpd %xmm1, %xmm5 385; SSE-NEXT: cmpltpd %xmm0, %xmm4 386; SSE-NEXT: packssdw %xmm5, %xmm4 387; SSE-NEXT: packssdw %xmm6, %xmm4 388; SSE-NEXT: movdqa %xmm4, %xmm0 389; SSE-NEXT: retq 390; 391; AVX1-LABEL: test_cmp_v8f64: 392; AVX1: # %bb.0: 393; AVX1-NEXT: vcmpltpd %ymm1, %ymm3, %ymm1 394; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 395; AVX1-NEXT: vpackssdw %xmm3, %xmm1, %xmm1 396; AVX1-NEXT: vcmpltpd %ymm0, %ymm2, %ymm0 397; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 398; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 399; AVX1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 400; AVX1-NEXT: vzeroupper 401; AVX1-NEXT: retq 402; 403; AVX2-LABEL: test_cmp_v8f64: 404; AVX2: # %bb.0: 405; AVX2-NEXT: vcmpltpd %ymm1, %ymm3, %ymm1 406; AVX2-NEXT: vcmpltpd %ymm0, %ymm2, %ymm0 407; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 408; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 409; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 410; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] 411; AVX2-NEXT: vzeroupper 412; AVX2-NEXT: retq 413; 414; AVX512F-LABEL: test_cmp_v8f64: 415; AVX512F: # %bb.0: 416; AVX512F-NEXT: vcmpltpd %zmm0, %zmm1, %k1 417; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 418; AVX512F-NEXT: vpmovdw %zmm0, %ymm0 419; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 420; AVX512F-NEXT: vzeroupper 421; AVX512F-NEXT: retq 422; 423; AVX512DQ-LABEL: test_cmp_v8f64: 424; AVX512DQ: # %bb.0: 425; AVX512DQ-NEXT: vcmpltpd %zmm0, %zmm1, %k0 426; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0 427; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0 428; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 429; AVX512DQ-NEXT: vzeroupper 430; AVX512DQ-NEXT: retq 431; 432; AVX512BW-LABEL: test_cmp_v8f64: 433; AVX512BW: # %bb.0: 434; AVX512BW-NEXT: vcmpltpd %zmm0, %zmm1, %k0 435; AVX512BW-NEXT: vpmovm2w %k0, %zmm0 436; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 437; AVX512BW-NEXT: vzeroupper 438; AVX512BW-NEXT: retq 439 %1 = fcmp ogt <8 x double> %a0, %a1 440 ret <8 x i1> %1 441} 442 443define <16 x i1> @test_cmp_v16f32(<16 x float> %a0, <16 x float> %a1) nounwind { 444; SSE-LABEL: test_cmp_v16f32: 445; SSE: # %bb.0: 446; SSE-NEXT: cmpltps %xmm3, %xmm7 447; SSE-NEXT: cmpltps %xmm2, %xmm6 448; SSE-NEXT: packssdw %xmm7, %xmm6 449; SSE-NEXT: cmpltps %xmm1, %xmm5 450; SSE-NEXT: cmpltps %xmm0, %xmm4 451; SSE-NEXT: packssdw %xmm5, %xmm4 452; SSE-NEXT: packsswb %xmm6, %xmm4 453; SSE-NEXT: movdqa %xmm4, %xmm0 454; SSE-NEXT: retq 455; 456; AVX1-LABEL: test_cmp_v16f32: 457; AVX1: # %bb.0: 458; AVX1-NEXT: vcmpltps %ymm1, %ymm3, %ymm1 459; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 460; AVX1-NEXT: vpackssdw %xmm3, %xmm1, %xmm1 461; AVX1-NEXT: vcmpltps %ymm0, %ymm2, %ymm0 462; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 463; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 464; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 465; AVX1-NEXT: vzeroupper 466; AVX1-NEXT: retq 467; 468; AVX2-LABEL: test_cmp_v16f32: 469; AVX2: # %bb.0: 470; AVX2-NEXT: vcmpltps %ymm1, %ymm3, %ymm1 471; AVX2-NEXT: vcmpltps %ymm0, %ymm2, %ymm0 472; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 473; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 474; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 475; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] 476; AVX2-NEXT: vzeroupper 477; AVX2-NEXT: retq 478; 479; AVX512F-LABEL: test_cmp_v16f32: 480; AVX512F: # %bb.0: 481; AVX512F-NEXT: vcmpltps %zmm0, %zmm1, %k1 482; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 483; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 484; AVX512F-NEXT: vzeroupper 485; AVX512F-NEXT: retq 486; 487; AVX512DQ-LABEL: test_cmp_v16f32: 488; AVX512DQ: # %bb.0: 489; AVX512DQ-NEXT: vcmpltps %zmm0, %zmm1, %k0 490; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0 491; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 492; AVX512DQ-NEXT: vzeroupper 493; AVX512DQ-NEXT: retq 494; 495; AVX512BW-LABEL: test_cmp_v16f32: 496; AVX512BW: # %bb.0: 497; AVX512BW-NEXT: vcmpltps %zmm0, %zmm1, %k0 498; AVX512BW-NEXT: vpmovm2b %k0, %zmm0 499; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 500; AVX512BW-NEXT: vzeroupper 501; AVX512BW-NEXT: retq 502 %1 = fcmp ogt <16 x float> %a0, %a1 503 ret <16 x i1> %1 504} 505 506define <8 x i1> @test_cmp_v8i64(<8 x i64> %a0, <8 x i64> %a1) nounwind { 507; SSE2-LABEL: test_cmp_v8i64: 508; SSE2: # %bb.0: 509; SSE2-NEXT: movdqa {{.*#+}} xmm8 = [2147483648,2147483648] 510; SSE2-NEXT: pxor %xmm8, %xmm7 511; SSE2-NEXT: pxor %xmm8, %xmm3 512; SSE2-NEXT: movdqa %xmm3, %xmm9 513; SSE2-NEXT: pcmpgtd %xmm7, %xmm9 514; SSE2-NEXT: pshufd {{.*#+}} xmm10 = xmm9[0,0,2,2] 515; SSE2-NEXT: pcmpeqd %xmm7, %xmm3 516; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] 517; SSE2-NEXT: pand %xmm10, %xmm3 518; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm9[1,1,3,3] 519; SSE2-NEXT: por %xmm3, %xmm7 520; SSE2-NEXT: pxor %xmm8, %xmm6 521; SSE2-NEXT: pxor %xmm8, %xmm2 522; SSE2-NEXT: movdqa %xmm2, %xmm3 523; SSE2-NEXT: pcmpgtd %xmm6, %xmm3 524; SSE2-NEXT: pshufd {{.*#+}} xmm9 = xmm3[0,0,2,2] 525; SSE2-NEXT: pcmpeqd %xmm6, %xmm2 526; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm2[1,1,3,3] 527; SSE2-NEXT: pand %xmm9, %xmm6 528; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3] 529; SSE2-NEXT: por %xmm6, %xmm2 530; SSE2-NEXT: packssdw %xmm7, %xmm2 531; SSE2-NEXT: pxor %xmm8, %xmm5 532; SSE2-NEXT: pxor %xmm8, %xmm1 533; SSE2-NEXT: movdqa %xmm1, %xmm3 534; SSE2-NEXT: pcmpgtd %xmm5, %xmm3 535; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm3[0,0,2,2] 536; SSE2-NEXT: pcmpeqd %xmm5, %xmm1 537; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 538; SSE2-NEXT: pand %xmm6, %xmm1 539; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] 540; SSE2-NEXT: por %xmm1, %xmm3 541; SSE2-NEXT: pxor %xmm8, %xmm4 542; SSE2-NEXT: pxor %xmm8, %xmm0 543; SSE2-NEXT: movdqa %xmm0, %xmm1 544; SSE2-NEXT: pcmpgtd %xmm4, %xmm1 545; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm1[0,0,2,2] 546; SSE2-NEXT: pcmpeqd %xmm4, %xmm0 547; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3] 548; SSE2-NEXT: pand %xmm5, %xmm4 549; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3] 550; SSE2-NEXT: por %xmm4, %xmm0 551; SSE2-NEXT: packssdw %xmm3, %xmm0 552; SSE2-NEXT: packssdw %xmm2, %xmm0 553; SSE2-NEXT: retq 554; 555; SSE42-LABEL: test_cmp_v8i64: 556; SSE42: # %bb.0: 557; SSE42-NEXT: pcmpgtq %xmm7, %xmm3 558; SSE42-NEXT: pcmpgtq %xmm6, %xmm2 559; SSE42-NEXT: packssdw %xmm3, %xmm2 560; SSE42-NEXT: pcmpgtq %xmm5, %xmm1 561; SSE42-NEXT: pcmpgtq %xmm4, %xmm0 562; SSE42-NEXT: packssdw %xmm1, %xmm0 563; SSE42-NEXT: packssdw %xmm2, %xmm0 564; SSE42-NEXT: retq 565; 566; AVX1-LABEL: test_cmp_v8i64: 567; AVX1: # %bb.0: 568; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 569; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm5 570; AVX1-NEXT: vpcmpgtq %xmm4, %xmm5, %xmm4 571; AVX1-NEXT: vpcmpgtq %xmm3, %xmm1, %xmm1 572; AVX1-NEXT: vpackssdw %xmm4, %xmm1, %xmm1 573; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3 574; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 575; AVX1-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3 576; AVX1-NEXT: vpcmpgtq %xmm2, %xmm0, %xmm0 577; AVX1-NEXT: vpackssdw %xmm3, %xmm0, %xmm0 578; AVX1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 579; AVX1-NEXT: vzeroupper 580; AVX1-NEXT: retq 581; 582; AVX2-LABEL: test_cmp_v8i64: 583; AVX2: # %bb.0: 584; AVX2-NEXT: vpcmpgtq %ymm3, %ymm1, %ymm1 585; AVX2-NEXT: vpcmpgtq %ymm2, %ymm0, %ymm0 586; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 587; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 588; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 589; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] 590; AVX2-NEXT: vzeroupper 591; AVX2-NEXT: retq 592; 593; AVX512F-LABEL: test_cmp_v8i64: 594; AVX512F: # %bb.0: 595; AVX512F-NEXT: vpcmpgtq %zmm1, %zmm0, %k1 596; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 597; AVX512F-NEXT: vpmovdw %zmm0, %ymm0 598; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 599; AVX512F-NEXT: vzeroupper 600; AVX512F-NEXT: retq 601; 602; AVX512DQ-LABEL: test_cmp_v8i64: 603; AVX512DQ: # %bb.0: 604; AVX512DQ-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 605; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0 606; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0 607; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 608; AVX512DQ-NEXT: vzeroupper 609; AVX512DQ-NEXT: retq 610; 611; AVX512BW-LABEL: test_cmp_v8i64: 612; AVX512BW: # %bb.0: 613; AVX512BW-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 614; AVX512BW-NEXT: vpmovm2w %k0, %zmm0 615; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 616; AVX512BW-NEXT: vzeroupper 617; AVX512BW-NEXT: retq 618 %1 = icmp sgt <8 x i64> %a0, %a1 619 ret <8 x i1> %1 620} 621 622define <16 x i1> @test_cmp_v16i32(<16 x i32> %a0, <16 x i32> %a1) nounwind { 623; SSE-LABEL: test_cmp_v16i32: 624; SSE: # %bb.0: 625; SSE-NEXT: pcmpgtd %xmm7, %xmm3 626; SSE-NEXT: pcmpgtd %xmm6, %xmm2 627; SSE-NEXT: packssdw %xmm3, %xmm2 628; SSE-NEXT: pcmpgtd %xmm5, %xmm1 629; SSE-NEXT: pcmpgtd %xmm4, %xmm0 630; SSE-NEXT: packssdw %xmm1, %xmm0 631; SSE-NEXT: packsswb %xmm2, %xmm0 632; SSE-NEXT: retq 633; 634; AVX1-LABEL: test_cmp_v16i32: 635; AVX1: # %bb.0: 636; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 637; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm5 638; AVX1-NEXT: vpcmpgtd %xmm4, %xmm5, %xmm4 639; AVX1-NEXT: vpcmpgtd %xmm3, %xmm1, %xmm1 640; AVX1-NEXT: vpackssdw %xmm4, %xmm1, %xmm1 641; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3 642; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 643; AVX1-NEXT: vpcmpgtd %xmm3, %xmm4, %xmm3 644; AVX1-NEXT: vpcmpgtd %xmm2, %xmm0, %xmm0 645; AVX1-NEXT: vpackssdw %xmm3, %xmm0, %xmm0 646; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 647; AVX1-NEXT: vzeroupper 648; AVX1-NEXT: retq 649; 650; AVX2-LABEL: test_cmp_v16i32: 651; AVX2: # %bb.0: 652; AVX2-NEXT: vpcmpgtd %ymm3, %ymm1, %ymm1 653; AVX2-NEXT: vpcmpgtd %ymm2, %ymm0, %ymm0 654; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 655; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 656; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 657; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] 658; AVX2-NEXT: vzeroupper 659; AVX2-NEXT: retq 660; 661; AVX512F-LABEL: test_cmp_v16i32: 662; AVX512F: # %bb.0: 663; AVX512F-NEXT: vpcmpgtd %zmm1, %zmm0, %k1 664; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 665; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 666; AVX512F-NEXT: vzeroupper 667; AVX512F-NEXT: retq 668; 669; AVX512DQ-LABEL: test_cmp_v16i32: 670; AVX512DQ: # %bb.0: 671; AVX512DQ-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 672; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0 673; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 674; AVX512DQ-NEXT: vzeroupper 675; AVX512DQ-NEXT: retq 676; 677; AVX512BW-LABEL: test_cmp_v16i32: 678; AVX512BW: # %bb.0: 679; AVX512BW-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 680; AVX512BW-NEXT: vpmovm2b %k0, %zmm0 681; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 682; AVX512BW-NEXT: vzeroupper 683; AVX512BW-NEXT: retq 684 %1 = icmp sgt <16 x i32> %a0, %a1 685 ret <16 x i1> %1 686} 687 688define <32 x i1> @test_cmp_v32i16(<32 x i16> %a0, <32 x i16> %a1) nounwind { 689; SSE-LABEL: test_cmp_v32i16: 690; SSE: # %bb.0: 691; SSE-NEXT: movq %rdi, %rax 692; SSE-NEXT: pcmpgtw %xmm5, %xmm1 693; SSE-NEXT: pcmpgtw %xmm4, %xmm0 694; SSE-NEXT: packsswb %xmm1, %xmm0 695; SSE-NEXT: pmovmskb %xmm0, %ecx 696; SSE-NEXT: pcmpgtw %xmm7, %xmm3 697; SSE-NEXT: pcmpgtw %xmm6, %xmm2 698; SSE-NEXT: packsswb %xmm3, %xmm2 699; SSE-NEXT: pmovmskb %xmm2, %edx 700; SSE-NEXT: shll $16, %edx 701; SSE-NEXT: orl %ecx, %edx 702; SSE-NEXT: movl %edx, (%rdi) 703; SSE-NEXT: retq 704; 705; AVX1-LABEL: test_cmp_v32i16: 706; AVX1: # %bb.0: 707; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 708; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm5 709; AVX1-NEXT: vpcmpgtw %xmm4, %xmm5, %xmm4 710; AVX1-NEXT: vpcmpgtw %xmm3, %xmm1, %xmm1 711; AVX1-NEXT: vpacksswb %xmm4, %xmm1, %xmm1 712; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3 713; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 714; AVX1-NEXT: vpcmpgtw %xmm3, %xmm4, %xmm3 715; AVX1-NEXT: vpcmpgtw %xmm2, %xmm0, %xmm0 716; AVX1-NEXT: vpacksswb %xmm3, %xmm0, %xmm0 717; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 718; AVX1-NEXT: retq 719; 720; AVX2-LABEL: test_cmp_v32i16: 721; AVX2: # %bb.0: 722; AVX2-NEXT: vpcmpgtw %ymm3, %ymm1, %ymm1 723; AVX2-NEXT: vpcmpgtw %ymm2, %ymm0, %ymm0 724; AVX2-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 725; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] 726; AVX2-NEXT: retq 727; 728; AVX512F-LABEL: test_cmp_v32i16: 729; AVX512F: # %bb.0: 730; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm2 731; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm3 732; AVX512F-NEXT: vpcmpgtw %ymm2, %ymm3, %ymm2 733; AVX512F-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 734; AVX512F-NEXT: vpacksswb %ymm2, %ymm0, %ymm0 735; AVX512F-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] 736; AVX512F-NEXT: retq 737; 738; AVX512DQ-LABEL: test_cmp_v32i16: 739; AVX512DQ: # %bb.0: 740; AVX512DQ-NEXT: vextracti64x4 $1, %zmm1, %ymm2 741; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm3 742; AVX512DQ-NEXT: vpcmpgtw %ymm2, %ymm3, %ymm2 743; AVX512DQ-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 744; AVX512DQ-NEXT: vpacksswb %ymm2, %ymm0, %ymm0 745; AVX512DQ-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] 746; AVX512DQ-NEXT: retq 747; 748; AVX512BW-LABEL: test_cmp_v32i16: 749; AVX512BW: # %bb.0: 750; AVX512BW-NEXT: vpcmpgtw %zmm1, %zmm0, %k0 751; AVX512BW-NEXT: vpmovm2b %k0, %zmm0 752; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 753; AVX512BW-NEXT: retq 754 %1 = icmp sgt <32 x i16> %a0, %a1 755 ret <32 x i1> %1 756} 757 758define <64 x i1> @test_cmp_v64i8(<64 x i8> %a0, <64 x i8> %a1) nounwind { 759; SSE-LABEL: test_cmp_v64i8: 760; SSE: # %bb.0: 761; SSE-NEXT: movq %rdi, %rax 762; SSE-NEXT: pcmpgtb %xmm4, %xmm0 763; SSE-NEXT: pmovmskb %xmm0, %ecx 764; SSE-NEXT: pcmpgtb %xmm5, %xmm1 765; SSE-NEXT: pmovmskb %xmm1, %edx 766; SSE-NEXT: shll $16, %edx 767; SSE-NEXT: orl %ecx, %edx 768; SSE-NEXT: pcmpgtb %xmm6, %xmm2 769; SSE-NEXT: pmovmskb %xmm2, %ecx 770; SSE-NEXT: pcmpgtb %xmm7, %xmm3 771; SSE-NEXT: pmovmskb %xmm3, %esi 772; SSE-NEXT: shll $16, %esi 773; SSE-NEXT: orl %ecx, %esi 774; SSE-NEXT: shlq $32, %rsi 775; SSE-NEXT: orq %rdx, %rsi 776; SSE-NEXT: movq %rsi, (%rdi) 777; SSE-NEXT: retq 778; 779; AVX1-LABEL: test_cmp_v64i8: 780; AVX1: # %bb.0: 781; AVX1-NEXT: movq %rdi, %rax 782; AVX1-NEXT: vpcmpgtb %xmm2, %xmm0, %xmm4 783; AVX1-NEXT: vpmovmskb %xmm4, %ecx 784; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2 785; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 786; AVX1-NEXT: vpcmpgtb %xmm2, %xmm0, %xmm0 787; AVX1-NEXT: vpmovmskb %xmm0, %edx 788; AVX1-NEXT: shll $16, %edx 789; AVX1-NEXT: orl %ecx, %edx 790; AVX1-NEXT: vpcmpgtb %xmm3, %xmm1, %xmm0 791; AVX1-NEXT: vpmovmskb %xmm0, %ecx 792; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm0 793; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 794; AVX1-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 795; AVX1-NEXT: vpmovmskb %xmm0, %esi 796; AVX1-NEXT: shll $16, %esi 797; AVX1-NEXT: orl %ecx, %esi 798; AVX1-NEXT: shlq $32, %rsi 799; AVX1-NEXT: orq %rdx, %rsi 800; AVX1-NEXT: movq %rsi, (%rdi) 801; AVX1-NEXT: vzeroupper 802; AVX1-NEXT: retq 803; 804; AVX2-LABEL: test_cmp_v64i8: 805; AVX2: # %bb.0: 806; AVX2-NEXT: movq %rdi, %rax 807; AVX2-NEXT: vpcmpgtb %ymm2, %ymm0, %ymm0 808; AVX2-NEXT: vpmovmskb %ymm0, %ecx 809; AVX2-NEXT: vpcmpgtb %ymm3, %ymm1, %ymm0 810; AVX2-NEXT: vpmovmskb %ymm0, %edx 811; AVX2-NEXT: shlq $32, %rdx 812; AVX2-NEXT: orq %rcx, %rdx 813; AVX2-NEXT: movq %rdx, (%rdi) 814; AVX2-NEXT: vzeroupper 815; AVX2-NEXT: retq 816; 817; AVX512F-LABEL: test_cmp_v64i8: 818; AVX512F: # %bb.0: 819; AVX512F-NEXT: movq %rdi, %rax 820; AVX512F-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm2 821; AVX512F-NEXT: vpmovsxbd %xmm2, %zmm3 822; AVX512F-NEXT: vptestmd %zmm3, %zmm3, %k0 823; AVX512F-NEXT: vextracti128 $1, %ymm2, %xmm2 824; AVX512F-NEXT: vpmovsxbd %xmm2, %zmm2 825; AVX512F-NEXT: vptestmd %zmm2, %zmm2, %k1 826; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm1 827; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm0 828; AVX512F-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 829; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm1 830; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k2 831; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0 832; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 833; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k3 834; AVX512F-NEXT: kmovw %k3, 6(%rdi) 835; AVX512F-NEXT: kmovw %k2, 4(%rdi) 836; AVX512F-NEXT: kmovw %k1, 2(%rdi) 837; AVX512F-NEXT: kmovw %k0, (%rdi) 838; AVX512F-NEXT: vzeroupper 839; AVX512F-NEXT: retq 840; 841; AVX512DQ-LABEL: test_cmp_v64i8: 842; AVX512DQ: # %bb.0: 843; AVX512DQ-NEXT: movq %rdi, %rax 844; AVX512DQ-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm2 845; AVX512DQ-NEXT: vpmovsxbd %xmm2, %zmm3 846; AVX512DQ-NEXT: vpmovd2m %zmm3, %k0 847; AVX512DQ-NEXT: vextracti128 $1, %ymm2, %xmm2 848; AVX512DQ-NEXT: vpmovsxbd %xmm2, %zmm2 849; AVX512DQ-NEXT: vpmovd2m %zmm2, %k1 850; AVX512DQ-NEXT: vextracti64x4 $1, %zmm1, %ymm1 851; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm0 852; AVX512DQ-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 853; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm1 854; AVX512DQ-NEXT: vpmovd2m %zmm1, %k2 855; AVX512DQ-NEXT: vextracti128 $1, %ymm0, %xmm0 856; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0 857; AVX512DQ-NEXT: vpmovd2m %zmm0, %k3 858; AVX512DQ-NEXT: kmovw %k3, 6(%rdi) 859; AVX512DQ-NEXT: kmovw %k2, 4(%rdi) 860; AVX512DQ-NEXT: kmovw %k1, 2(%rdi) 861; AVX512DQ-NEXT: kmovw %k0, (%rdi) 862; AVX512DQ-NEXT: vzeroupper 863; AVX512DQ-NEXT: retq 864; 865; AVX512BW-LABEL: test_cmp_v64i8: 866; AVX512BW: # %bb.0: 867; AVX512BW-NEXT: vpcmpgtb %zmm1, %zmm0, %k0 868; AVX512BW-NEXT: vpmovm2b %k0, %zmm0 869; AVX512BW-NEXT: retq 870 %1 = icmp sgt <64 x i8> %a0, %a1 871 ret <64 x i1> %1 872} 873 874; 875; 1024-bit vector comparisons 876; 877 878define <16 x i1> @test_cmp_v16f64(<16 x double> %a0, <16 x double> %a1) nounwind { 879; SSE-LABEL: test_cmp_v16f64: 880; SSE: # %bb.0: 881; SSE-NEXT: movapd %xmm0, %xmm8 882; SSE-NEXT: movapd {{[0-9]+}}(%rsp), %xmm0 883; SSE-NEXT: movapd {{[0-9]+}}(%rsp), %xmm11 884; SSE-NEXT: movapd {{[0-9]+}}(%rsp), %xmm10 885; SSE-NEXT: movapd {{[0-9]+}}(%rsp), %xmm12 886; SSE-NEXT: movapd {{[0-9]+}}(%rsp), %xmm9 887; SSE-NEXT: movapd {{[0-9]+}}(%rsp), %xmm13 888; SSE-NEXT: movapd {{[0-9]+}}(%rsp), %xmm14 889; SSE-NEXT: movapd {{[0-9]+}}(%rsp), %xmm15 890; SSE-NEXT: cmpltpd %xmm7, %xmm15 891; SSE-NEXT: cmpltpd %xmm6, %xmm14 892; SSE-NEXT: packssdw %xmm15, %xmm14 893; SSE-NEXT: cmpltpd %xmm5, %xmm13 894; SSE-NEXT: cmpltpd %xmm4, %xmm9 895; SSE-NEXT: packssdw %xmm13, %xmm9 896; SSE-NEXT: packssdw %xmm14, %xmm9 897; SSE-NEXT: cmpltpd %xmm3, %xmm12 898; SSE-NEXT: cmpltpd %xmm2, %xmm10 899; SSE-NEXT: packssdw %xmm12, %xmm10 900; SSE-NEXT: cmpltpd %xmm1, %xmm11 901; SSE-NEXT: cmpltpd %xmm8, %xmm0 902; SSE-NEXT: packssdw %xmm11, %xmm0 903; SSE-NEXT: packssdw %xmm10, %xmm0 904; SSE-NEXT: packsswb %xmm9, %xmm0 905; SSE-NEXT: retq 906; 907; AVX1-LABEL: test_cmp_v16f64: 908; AVX1: # %bb.0: 909; AVX1-NEXT: vcmpltpd %ymm3, %ymm7, %ymm3 910; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm7 911; AVX1-NEXT: vpackssdw %xmm7, %xmm3, %xmm3 912; AVX1-NEXT: vcmpltpd %ymm2, %ymm6, %ymm2 913; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm6 914; AVX1-NEXT: vpackssdw %xmm6, %xmm2, %xmm2 915; AVX1-NEXT: vpackssdw %xmm3, %xmm2, %xmm2 916; AVX1-NEXT: vcmpltpd %ymm1, %ymm5, %ymm1 917; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 918; AVX1-NEXT: vpackssdw %xmm3, %xmm1, %xmm1 919; AVX1-NEXT: vcmpltpd %ymm0, %ymm4, %ymm0 920; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 921; AVX1-NEXT: vpackssdw %xmm3, %xmm0, %xmm0 922; AVX1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 923; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0 924; AVX1-NEXT: vzeroupper 925; AVX1-NEXT: retq 926; 927; AVX2-LABEL: test_cmp_v16f64: 928; AVX2: # %bb.0: 929; AVX2-NEXT: vcmpltpd %ymm3, %ymm7, %ymm3 930; AVX2-NEXT: vcmpltpd %ymm2, %ymm6, %ymm2 931; AVX2-NEXT: vpackssdw %ymm3, %ymm2, %ymm2 932; AVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,1,3] 933; AVX2-NEXT: vcmpltpd %ymm1, %ymm5, %ymm1 934; AVX2-NEXT: vcmpltpd %ymm0, %ymm4, %ymm0 935; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 936; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] 937; AVX2-NEXT: vpackssdw %ymm2, %ymm0, %ymm0 938; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 939; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 940; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] 941; AVX2-NEXT: vzeroupper 942; AVX2-NEXT: retq 943; 944; AVX512F-LABEL: test_cmp_v16f64: 945; AVX512F: # %bb.0: 946; AVX512F-NEXT: vcmpltpd %zmm0, %zmm2, %k0 947; AVX512F-NEXT: vcmpltpd %zmm1, %zmm3, %k1 948; AVX512F-NEXT: kunpckbw %k0, %k1, %k1 949; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 950; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 951; AVX512F-NEXT: vzeroupper 952; AVX512F-NEXT: retq 953; 954; AVX512DQ-LABEL: test_cmp_v16f64: 955; AVX512DQ: # %bb.0: 956; AVX512DQ-NEXT: vcmpltpd %zmm0, %zmm2, %k0 957; AVX512DQ-NEXT: vcmpltpd %zmm1, %zmm3, %k1 958; AVX512DQ-NEXT: kunpckbw %k0, %k1, %k0 959; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0 960; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 961; AVX512DQ-NEXT: vzeroupper 962; AVX512DQ-NEXT: retq 963; 964; AVX512BW-LABEL: test_cmp_v16f64: 965; AVX512BW: # %bb.0: 966; AVX512BW-NEXT: vcmpltpd %zmm0, %zmm2, %k0 967; AVX512BW-NEXT: vcmpltpd %zmm1, %zmm3, %k1 968; AVX512BW-NEXT: kunpckbw %k0, %k1, %k0 969; AVX512BW-NEXT: vpmovm2b %k0, %zmm0 970; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 971; AVX512BW-NEXT: vzeroupper 972; AVX512BW-NEXT: retq 973 %1 = fcmp ogt <16 x double> %a0, %a1 974 ret <16 x i1> %1 975} 976 977define <32 x i1> @test_cmp_v32f32(<32 x float> %a0, <32 x float> %a1) nounwind { 978; SSE-LABEL: test_cmp_v32f32: 979; SSE: # %bb.0: 980; SSE-NEXT: movq %rdi, %rax 981; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm8 982; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm10 983; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm9 984; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm11 985; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm12 986; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm13 987; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm14 988; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm15 989; SSE-NEXT: cmpltps %xmm3, %xmm15 990; SSE-NEXT: cmpltps %xmm2, %xmm14 991; SSE-NEXT: packssdw %xmm15, %xmm14 992; SSE-NEXT: cmpltps %xmm1, %xmm13 993; SSE-NEXT: cmpltps %xmm0, %xmm12 994; SSE-NEXT: packssdw %xmm13, %xmm12 995; SSE-NEXT: packsswb %xmm14, %xmm12 996; SSE-NEXT: pmovmskb %xmm12, %ecx 997; SSE-NEXT: cmpltps %xmm7, %xmm11 998; SSE-NEXT: cmpltps %xmm6, %xmm9 999; SSE-NEXT: packssdw %xmm11, %xmm9 1000; SSE-NEXT: cmpltps %xmm5, %xmm10 1001; SSE-NEXT: cmpltps %xmm4, %xmm8 1002; SSE-NEXT: packssdw %xmm10, %xmm8 1003; SSE-NEXT: packsswb %xmm9, %xmm8 1004; SSE-NEXT: pmovmskb %xmm8, %edx 1005; SSE-NEXT: shll $16, %edx 1006; SSE-NEXT: orl %ecx, %edx 1007; SSE-NEXT: movl %edx, (%rdi) 1008; SSE-NEXT: retq 1009; 1010; AVX1-LABEL: test_cmp_v32f32: 1011; AVX1: # %bb.0: 1012; AVX1-NEXT: vcmpltps %ymm3, %ymm7, %ymm3 1013; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm7 1014; AVX1-NEXT: vpackssdw %xmm7, %xmm3, %xmm3 1015; AVX1-NEXT: vcmpltps %ymm2, %ymm6, %ymm2 1016; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm6 1017; AVX1-NEXT: vpackssdw %xmm6, %xmm2, %xmm2 1018; AVX1-NEXT: vpacksswb %xmm3, %xmm2, %xmm2 1019; AVX1-NEXT: vcmpltps %ymm1, %ymm5, %ymm1 1020; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 1021; AVX1-NEXT: vpackssdw %xmm3, %xmm1, %xmm1 1022; AVX1-NEXT: vcmpltps %ymm0, %ymm4, %ymm0 1023; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1024; AVX1-NEXT: vpackssdw %xmm3, %xmm0, %xmm0 1025; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 1026; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1027; AVX1-NEXT: retq 1028; 1029; AVX2-LABEL: test_cmp_v32f32: 1030; AVX2: # %bb.0: 1031; AVX2-NEXT: vcmpltps %ymm3, %ymm7, %ymm3 1032; AVX2-NEXT: vcmpltps %ymm2, %ymm6, %ymm2 1033; AVX2-NEXT: vpackssdw %ymm3, %ymm2, %ymm2 1034; AVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,1,3] 1035; AVX2-NEXT: vcmpltps %ymm1, %ymm5, %ymm1 1036; AVX2-NEXT: vcmpltps %ymm0, %ymm4, %ymm0 1037; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 1038; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] 1039; AVX2-NEXT: vpacksswb %ymm2, %ymm0, %ymm0 1040; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] 1041; AVX2-NEXT: retq 1042; 1043; AVX512F-LABEL: test_cmp_v32f32: 1044; AVX512F: # %bb.0: 1045; AVX512F-NEXT: vcmpltps %zmm1, %zmm3, %k1 1046; AVX512F-NEXT: vcmpltps %zmm0, %zmm2, %k2 1047; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z} 1048; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 1049; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} 1050; AVX512F-NEXT: vpmovdb %zmm1, %xmm1 1051; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 1052; AVX512F-NEXT: retq 1053; 1054; AVX512DQ-LABEL: test_cmp_v32f32: 1055; AVX512DQ: # %bb.0: 1056; AVX512DQ-NEXT: vcmpltps %zmm1, %zmm3, %k0 1057; AVX512DQ-NEXT: vcmpltps %zmm0, %zmm2, %k1 1058; AVX512DQ-NEXT: vpmovm2d %k1, %zmm0 1059; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 1060; AVX512DQ-NEXT: vpmovm2d %k0, %zmm1 1061; AVX512DQ-NEXT: vpmovdb %zmm1, %xmm1 1062; AVX512DQ-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 1063; AVX512DQ-NEXT: retq 1064; 1065; AVX512BW-LABEL: test_cmp_v32f32: 1066; AVX512BW: # %bb.0: 1067; AVX512BW-NEXT: vcmpltps %zmm0, %zmm2, %k0 1068; AVX512BW-NEXT: vcmpltps %zmm1, %zmm3, %k1 1069; AVX512BW-NEXT: kunpckwd %k0, %k1, %k0 1070; AVX512BW-NEXT: vpmovm2b %k0, %zmm0 1071; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 1072; AVX512BW-NEXT: retq 1073 %1 = fcmp ogt <32 x float> %a0, %a1 1074 ret <32 x i1> %1 1075} 1076 1077define <16 x i1> @test_cmp_v16i64(<16 x i64> %a0, <16 x i64> %a1) nounwind { 1078; SSE2-LABEL: test_cmp_v16i64: 1079; SSE2: # %bb.0: 1080; SSE2-NEXT: movdqa {{.*#+}} xmm8 = [2147483648,2147483648] 1081; SSE2-NEXT: pxor %xmm8, %xmm7 1082; SSE2-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm9 1083; SSE2-NEXT: pxor %xmm8, %xmm9 1084; SSE2-NEXT: movdqa %xmm7, %xmm10 1085; SSE2-NEXT: pcmpgtd %xmm9, %xmm10 1086; SSE2-NEXT: pshufd {{.*#+}} xmm11 = xmm10[0,0,2,2] 1087; SSE2-NEXT: pcmpeqd %xmm7, %xmm9 1088; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm9[1,1,3,3] 1089; SSE2-NEXT: pand %xmm11, %xmm7 1090; SSE2-NEXT: pshufd {{.*#+}} xmm9 = xmm10[1,1,3,3] 1091; SSE2-NEXT: por %xmm7, %xmm9 1092; SSE2-NEXT: pxor %xmm8, %xmm6 1093; SSE2-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm7 1094; SSE2-NEXT: pxor %xmm8, %xmm7 1095; SSE2-NEXT: movdqa %xmm6, %xmm10 1096; SSE2-NEXT: pcmpgtd %xmm7, %xmm10 1097; SSE2-NEXT: pshufd {{.*#+}} xmm11 = xmm10[0,0,2,2] 1098; SSE2-NEXT: pcmpeqd %xmm6, %xmm7 1099; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm7[1,1,3,3] 1100; SSE2-NEXT: pand %xmm11, %xmm7 1101; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm10[1,1,3,3] 1102; SSE2-NEXT: por %xmm7, %xmm6 1103; SSE2-NEXT: packssdw %xmm9, %xmm6 1104; SSE2-NEXT: pxor %xmm8, %xmm5 1105; SSE2-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm7 1106; SSE2-NEXT: pxor %xmm8, %xmm7 1107; SSE2-NEXT: movdqa %xmm5, %xmm9 1108; SSE2-NEXT: pcmpgtd %xmm7, %xmm9 1109; SSE2-NEXT: pshufd {{.*#+}} xmm10 = xmm9[0,0,2,2] 1110; SSE2-NEXT: pcmpeqd %xmm5, %xmm7 1111; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3] 1112; SSE2-NEXT: pand %xmm10, %xmm5 1113; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm9[1,1,3,3] 1114; SSE2-NEXT: por %xmm5, %xmm7 1115; SSE2-NEXT: pxor %xmm8, %xmm4 1116; SSE2-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm5 1117; SSE2-NEXT: pxor %xmm8, %xmm5 1118; SSE2-NEXT: movdqa %xmm4, %xmm9 1119; SSE2-NEXT: pcmpgtd %xmm5, %xmm9 1120; SSE2-NEXT: pshufd {{.*#+}} xmm10 = xmm9[0,0,2,2] 1121; SSE2-NEXT: pcmpeqd %xmm4, %xmm5 1122; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3] 1123; SSE2-NEXT: pand %xmm10, %xmm5 1124; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm9[1,1,3,3] 1125; SSE2-NEXT: por %xmm5, %xmm4 1126; SSE2-NEXT: packssdw %xmm7, %xmm4 1127; SSE2-NEXT: packssdw %xmm6, %xmm4 1128; SSE2-NEXT: pxor %xmm8, %xmm3 1129; SSE2-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm5 1130; SSE2-NEXT: pxor %xmm8, %xmm5 1131; SSE2-NEXT: movdqa %xmm3, %xmm6 1132; SSE2-NEXT: pcmpgtd %xmm5, %xmm6 1133; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2] 1134; SSE2-NEXT: pcmpeqd %xmm3, %xmm5 1135; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm5[1,1,3,3] 1136; SSE2-NEXT: pand %xmm7, %xmm3 1137; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm6[1,1,3,3] 1138; SSE2-NEXT: por %xmm3, %xmm5 1139; SSE2-NEXT: pxor %xmm8, %xmm2 1140; SSE2-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm3 1141; SSE2-NEXT: pxor %xmm8, %xmm3 1142; SSE2-NEXT: movdqa %xmm2, %xmm6 1143; SSE2-NEXT: pcmpgtd %xmm3, %xmm6 1144; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2] 1145; SSE2-NEXT: pcmpeqd %xmm2, %xmm3 1146; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] 1147; SSE2-NEXT: pand %xmm7, %xmm3 1148; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm6[1,1,3,3] 1149; SSE2-NEXT: por %xmm3, %xmm2 1150; SSE2-NEXT: packssdw %xmm5, %xmm2 1151; SSE2-NEXT: pxor %xmm8, %xmm1 1152; SSE2-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm3 1153; SSE2-NEXT: pxor %xmm8, %xmm3 1154; SSE2-NEXT: movdqa %xmm1, %xmm5 1155; SSE2-NEXT: pcmpgtd %xmm3, %xmm5 1156; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2] 1157; SSE2-NEXT: pcmpeqd %xmm1, %xmm3 1158; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3] 1159; SSE2-NEXT: pand %xmm6, %xmm1 1160; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm5[1,1,3,3] 1161; SSE2-NEXT: por %xmm1, %xmm3 1162; SSE2-NEXT: pxor %xmm8, %xmm0 1163; SSE2-NEXT: pxor {{[0-9]+}}(%rsp), %xmm8 1164; SSE2-NEXT: movdqa %xmm0, %xmm1 1165; SSE2-NEXT: pcmpgtd %xmm8, %xmm1 1166; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm1[0,0,2,2] 1167; SSE2-NEXT: pcmpeqd %xmm0, %xmm8 1168; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm8[1,1,3,3] 1169; SSE2-NEXT: pand %xmm5, %xmm6 1170; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3] 1171; SSE2-NEXT: por %xmm6, %xmm0 1172; SSE2-NEXT: packssdw %xmm3, %xmm0 1173; SSE2-NEXT: packssdw %xmm2, %xmm0 1174; SSE2-NEXT: packsswb %xmm4, %xmm0 1175; SSE2-NEXT: retq 1176; 1177; SSE42-LABEL: test_cmp_v16i64: 1178; SSE42: # %bb.0: 1179; SSE42-NEXT: pcmpgtq {{[0-9]+}}(%rsp), %xmm7 1180; SSE42-NEXT: pcmpgtq {{[0-9]+}}(%rsp), %xmm6 1181; SSE42-NEXT: packssdw %xmm7, %xmm6 1182; SSE42-NEXT: pcmpgtq {{[0-9]+}}(%rsp), %xmm5 1183; SSE42-NEXT: pcmpgtq {{[0-9]+}}(%rsp), %xmm4 1184; SSE42-NEXT: packssdw %xmm5, %xmm4 1185; SSE42-NEXT: packssdw %xmm6, %xmm4 1186; SSE42-NEXT: pcmpgtq {{[0-9]+}}(%rsp), %xmm3 1187; SSE42-NEXT: pcmpgtq {{[0-9]+}}(%rsp), %xmm2 1188; SSE42-NEXT: packssdw %xmm3, %xmm2 1189; SSE42-NEXT: pcmpgtq {{[0-9]+}}(%rsp), %xmm1 1190; SSE42-NEXT: pcmpgtq {{[0-9]+}}(%rsp), %xmm0 1191; SSE42-NEXT: packssdw %xmm1, %xmm0 1192; SSE42-NEXT: packssdw %xmm2, %xmm0 1193; SSE42-NEXT: packsswb %xmm4, %xmm0 1194; SSE42-NEXT: retq 1195; 1196; AVX1-LABEL: test_cmp_v16i64: 1197; AVX1: # %bb.0: 1198; AVX1-NEXT: vextractf128 $1, %ymm7, %xmm8 1199; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm9 1200; AVX1-NEXT: vpcmpgtq %xmm8, %xmm9, %xmm8 1201; AVX1-NEXT: vpcmpgtq %xmm7, %xmm3, %xmm3 1202; AVX1-NEXT: vpackssdw %xmm8, %xmm3, %xmm3 1203; AVX1-NEXT: vextractf128 $1, %ymm6, %xmm7 1204; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm8 1205; AVX1-NEXT: vpcmpgtq %xmm7, %xmm8, %xmm7 1206; AVX1-NEXT: vpcmpgtq %xmm6, %xmm2, %xmm2 1207; AVX1-NEXT: vpackssdw %xmm7, %xmm2, %xmm2 1208; AVX1-NEXT: vpackssdw %xmm3, %xmm2, %xmm2 1209; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm3 1210; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm6 1211; AVX1-NEXT: vpcmpgtq %xmm3, %xmm6, %xmm3 1212; AVX1-NEXT: vpcmpgtq %xmm5, %xmm1, %xmm1 1213; AVX1-NEXT: vpackssdw %xmm3, %xmm1, %xmm1 1214; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm3 1215; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5 1216; AVX1-NEXT: vpcmpgtq %xmm3, %xmm5, %xmm3 1217; AVX1-NEXT: vpcmpgtq %xmm4, %xmm0, %xmm0 1218; AVX1-NEXT: vpackssdw %xmm3, %xmm0, %xmm0 1219; AVX1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 1220; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0 1221; AVX1-NEXT: vzeroupper 1222; AVX1-NEXT: retq 1223; 1224; AVX2-LABEL: test_cmp_v16i64: 1225; AVX2: # %bb.0: 1226; AVX2-NEXT: vpcmpgtq %ymm7, %ymm3, %ymm3 1227; AVX2-NEXT: vpcmpgtq %ymm6, %ymm2, %ymm2 1228; AVX2-NEXT: vpackssdw %ymm3, %ymm2, %ymm2 1229; AVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,1,3] 1230; AVX2-NEXT: vpcmpgtq %ymm5, %ymm1, %ymm1 1231; AVX2-NEXT: vpcmpgtq %ymm4, %ymm0, %ymm0 1232; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 1233; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] 1234; AVX2-NEXT: vpackssdw %ymm2, %ymm0, %ymm0 1235; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1236; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 1237; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] 1238; AVX2-NEXT: vzeroupper 1239; AVX2-NEXT: retq 1240; 1241; AVX512F-LABEL: test_cmp_v16i64: 1242; AVX512F: # %bb.0: 1243; AVX512F-NEXT: vpcmpgtq %zmm2, %zmm0, %k0 1244; AVX512F-NEXT: vpcmpgtq %zmm3, %zmm1, %k1 1245; AVX512F-NEXT: kunpckbw %k0, %k1, %k1 1246; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 1247; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 1248; AVX512F-NEXT: vzeroupper 1249; AVX512F-NEXT: retq 1250; 1251; AVX512DQ-LABEL: test_cmp_v16i64: 1252; AVX512DQ: # %bb.0: 1253; AVX512DQ-NEXT: vpcmpgtq %zmm2, %zmm0, %k0 1254; AVX512DQ-NEXT: vpcmpgtq %zmm3, %zmm1, %k1 1255; AVX512DQ-NEXT: kunpckbw %k0, %k1, %k0 1256; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0 1257; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 1258; AVX512DQ-NEXT: vzeroupper 1259; AVX512DQ-NEXT: retq 1260; 1261; AVX512BW-LABEL: test_cmp_v16i64: 1262; AVX512BW: # %bb.0: 1263; AVX512BW-NEXT: vpcmpgtq %zmm2, %zmm0, %k0 1264; AVX512BW-NEXT: vpcmpgtq %zmm3, %zmm1, %k1 1265; AVX512BW-NEXT: kunpckbw %k0, %k1, %k0 1266; AVX512BW-NEXT: vpmovm2b %k0, %zmm0 1267; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 1268; AVX512BW-NEXT: vzeroupper 1269; AVX512BW-NEXT: retq 1270 %1 = icmp sgt <16 x i64> %a0, %a1 1271 ret <16 x i1> %1 1272} 1273 1274define <32 x i1> @test_cmp_v32i32(<32 x i32> %a0, <32 x i32> %a1) nounwind { 1275; SSE-LABEL: test_cmp_v32i32: 1276; SSE: # %bb.0: 1277; SSE-NEXT: movq %rdi, %rax 1278; SSE-NEXT: pcmpgtd {{[0-9]+}}(%rsp), %xmm3 1279; SSE-NEXT: pcmpgtd {{[0-9]+}}(%rsp), %xmm2 1280; SSE-NEXT: packssdw %xmm3, %xmm2 1281; SSE-NEXT: pcmpgtd {{[0-9]+}}(%rsp), %xmm1 1282; SSE-NEXT: pcmpgtd {{[0-9]+}}(%rsp), %xmm0 1283; SSE-NEXT: packssdw %xmm1, %xmm0 1284; SSE-NEXT: packsswb %xmm2, %xmm0 1285; SSE-NEXT: pmovmskb %xmm0, %ecx 1286; SSE-NEXT: pcmpgtd {{[0-9]+}}(%rsp), %xmm7 1287; SSE-NEXT: pcmpgtd {{[0-9]+}}(%rsp), %xmm6 1288; SSE-NEXT: packssdw %xmm7, %xmm6 1289; SSE-NEXT: pcmpgtd {{[0-9]+}}(%rsp), %xmm5 1290; SSE-NEXT: pcmpgtd {{[0-9]+}}(%rsp), %xmm4 1291; SSE-NEXT: packssdw %xmm5, %xmm4 1292; SSE-NEXT: packsswb %xmm6, %xmm4 1293; SSE-NEXT: pmovmskb %xmm4, %edx 1294; SSE-NEXT: shll $16, %edx 1295; SSE-NEXT: orl %ecx, %edx 1296; SSE-NEXT: movl %edx, (%rdi) 1297; SSE-NEXT: retq 1298; 1299; AVX1-LABEL: test_cmp_v32i32: 1300; AVX1: # %bb.0: 1301; AVX1-NEXT: vextractf128 $1, %ymm7, %xmm8 1302; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm9 1303; AVX1-NEXT: vpcmpgtd %xmm8, %xmm9, %xmm8 1304; AVX1-NEXT: vpcmpgtd %xmm7, %xmm3, %xmm3 1305; AVX1-NEXT: vpackssdw %xmm8, %xmm3, %xmm3 1306; AVX1-NEXT: vextractf128 $1, %ymm6, %xmm7 1307; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm8 1308; AVX1-NEXT: vpcmpgtd %xmm7, %xmm8, %xmm7 1309; AVX1-NEXT: vpcmpgtd %xmm6, %xmm2, %xmm2 1310; AVX1-NEXT: vpackssdw %xmm7, %xmm2, %xmm2 1311; AVX1-NEXT: vpacksswb %xmm3, %xmm2, %xmm2 1312; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm3 1313; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm6 1314; AVX1-NEXT: vpcmpgtd %xmm3, %xmm6, %xmm3 1315; AVX1-NEXT: vpcmpgtd %xmm5, %xmm1, %xmm1 1316; AVX1-NEXT: vpackssdw %xmm3, %xmm1, %xmm1 1317; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm3 1318; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5 1319; AVX1-NEXT: vpcmpgtd %xmm3, %xmm5, %xmm3 1320; AVX1-NEXT: vpcmpgtd %xmm4, %xmm0, %xmm0 1321; AVX1-NEXT: vpackssdw %xmm3, %xmm0, %xmm0 1322; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 1323; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1324; AVX1-NEXT: retq 1325; 1326; AVX2-LABEL: test_cmp_v32i32: 1327; AVX2: # %bb.0: 1328; AVX2-NEXT: vpcmpgtd %ymm7, %ymm3, %ymm3 1329; AVX2-NEXT: vpcmpgtd %ymm6, %ymm2, %ymm2 1330; AVX2-NEXT: vpackssdw %ymm3, %ymm2, %ymm2 1331; AVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,1,3] 1332; AVX2-NEXT: vpcmpgtd %ymm5, %ymm1, %ymm1 1333; AVX2-NEXT: vpcmpgtd %ymm4, %ymm0, %ymm0 1334; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 1335; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] 1336; AVX2-NEXT: vpacksswb %ymm2, %ymm0, %ymm0 1337; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] 1338; AVX2-NEXT: retq 1339; 1340; AVX512F-LABEL: test_cmp_v32i32: 1341; AVX512F: # %bb.0: 1342; AVX512F-NEXT: vpcmpgtd %zmm3, %zmm1, %k1 1343; AVX512F-NEXT: vpcmpgtd %zmm2, %zmm0, %k2 1344; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z} 1345; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 1346; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} 1347; AVX512F-NEXT: vpmovdb %zmm1, %xmm1 1348; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 1349; AVX512F-NEXT: retq 1350; 1351; AVX512DQ-LABEL: test_cmp_v32i32: 1352; AVX512DQ: # %bb.0: 1353; AVX512DQ-NEXT: vpcmpgtd %zmm3, %zmm1, %k0 1354; AVX512DQ-NEXT: vpcmpgtd %zmm2, %zmm0, %k1 1355; AVX512DQ-NEXT: vpmovm2d %k1, %zmm0 1356; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 1357; AVX512DQ-NEXT: vpmovm2d %k0, %zmm1 1358; AVX512DQ-NEXT: vpmovdb %zmm1, %xmm1 1359; AVX512DQ-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 1360; AVX512DQ-NEXT: retq 1361; 1362; AVX512BW-LABEL: test_cmp_v32i32: 1363; AVX512BW: # %bb.0: 1364; AVX512BW-NEXT: vpcmpgtd %zmm2, %zmm0, %k0 1365; AVX512BW-NEXT: vpcmpgtd %zmm3, %zmm1, %k1 1366; AVX512BW-NEXT: kunpckwd %k0, %k1, %k0 1367; AVX512BW-NEXT: vpmovm2b %k0, %zmm0 1368; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 1369; AVX512BW-NEXT: retq 1370 %1 = icmp sgt <32 x i32> %a0, %a1 1371 ret <32 x i1> %1 1372} 1373 1374define <64 x i1> @test_cmp_v64i16(<64 x i16> %a0, <64 x i16> %a1) nounwind { 1375; SSE-LABEL: test_cmp_v64i16: 1376; SSE: # %bb.0: 1377; SSE-NEXT: movq %rdi, %rax 1378; SSE-NEXT: pcmpgtw {{[0-9]+}}(%rsp), %xmm1 1379; SSE-NEXT: pcmpgtw {{[0-9]+}}(%rsp), %xmm0 1380; SSE-NEXT: packsswb %xmm1, %xmm0 1381; SSE-NEXT: pmovmskb %xmm0, %ecx 1382; SSE-NEXT: pcmpgtw {{[0-9]+}}(%rsp), %xmm3 1383; SSE-NEXT: pcmpgtw {{[0-9]+}}(%rsp), %xmm2 1384; SSE-NEXT: packsswb %xmm3, %xmm2 1385; SSE-NEXT: pmovmskb %xmm2, %edx 1386; SSE-NEXT: shll $16, %edx 1387; SSE-NEXT: orl %ecx, %edx 1388; SSE-NEXT: pcmpgtw {{[0-9]+}}(%rsp), %xmm5 1389; SSE-NEXT: pcmpgtw {{[0-9]+}}(%rsp), %xmm4 1390; SSE-NEXT: packsswb %xmm5, %xmm4 1391; SSE-NEXT: pmovmskb %xmm4, %ecx 1392; SSE-NEXT: pcmpgtw {{[0-9]+}}(%rsp), %xmm7 1393; SSE-NEXT: pcmpgtw {{[0-9]+}}(%rsp), %xmm6 1394; SSE-NEXT: packsswb %xmm7, %xmm6 1395; SSE-NEXT: pmovmskb %xmm6, %esi 1396; SSE-NEXT: shll $16, %esi 1397; SSE-NEXT: orl %ecx, %esi 1398; SSE-NEXT: shlq $32, %rsi 1399; SSE-NEXT: orq %rdx, %rsi 1400; SSE-NEXT: movq %rsi, (%rdi) 1401; SSE-NEXT: retq 1402; 1403; AVX1-LABEL: test_cmp_v64i16: 1404; AVX1: # %bb.0: 1405; AVX1-NEXT: movq %rdi, %rax 1406; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm8 1407; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm9 1408; AVX1-NEXT: vpcmpgtw %xmm8, %xmm9, %xmm8 1409; AVX1-NEXT: vpcmpgtw %xmm4, %xmm0, %xmm0 1410; AVX1-NEXT: vpacksswb %xmm8, %xmm0, %xmm0 1411; AVX1-NEXT: vpmovmskb %xmm0, %ecx 1412; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm0 1413; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4 1414; AVX1-NEXT: vpcmpgtw %xmm0, %xmm4, %xmm0 1415; AVX1-NEXT: vpcmpgtw %xmm5, %xmm1, %xmm1 1416; AVX1-NEXT: vpacksswb %xmm0, %xmm1, %xmm0 1417; AVX1-NEXT: vpmovmskb %xmm0, %edx 1418; AVX1-NEXT: shll $16, %edx 1419; AVX1-NEXT: orl %ecx, %edx 1420; AVX1-NEXT: vextractf128 $1, %ymm6, %xmm0 1421; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm1 1422; AVX1-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 1423; AVX1-NEXT: vpcmpgtw %xmm6, %xmm2, %xmm1 1424; AVX1-NEXT: vpacksswb %xmm0, %xmm1, %xmm0 1425; AVX1-NEXT: vpmovmskb %xmm0, %ecx 1426; AVX1-NEXT: vextractf128 $1, %ymm7, %xmm0 1427; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm1 1428; AVX1-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 1429; AVX1-NEXT: vpcmpgtw %xmm7, %xmm3, %xmm1 1430; AVX1-NEXT: vpacksswb %xmm0, %xmm1, %xmm0 1431; AVX1-NEXT: vpmovmskb %xmm0, %esi 1432; AVX1-NEXT: shll $16, %esi 1433; AVX1-NEXT: orl %ecx, %esi 1434; AVX1-NEXT: shlq $32, %rsi 1435; AVX1-NEXT: orq %rdx, %rsi 1436; AVX1-NEXT: movq %rsi, (%rdi) 1437; AVX1-NEXT: vzeroupper 1438; AVX1-NEXT: retq 1439; 1440; AVX2-LABEL: test_cmp_v64i16: 1441; AVX2: # %bb.0: 1442; AVX2-NEXT: movq %rdi, %rax 1443; AVX2-NEXT: vpcmpgtw %ymm5, %ymm1, %ymm1 1444; AVX2-NEXT: vpcmpgtw %ymm4, %ymm0, %ymm0 1445; AVX2-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 1446; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] 1447; AVX2-NEXT: vpmovmskb %ymm0, %ecx 1448; AVX2-NEXT: vpcmpgtw %ymm7, %ymm3, %ymm0 1449; AVX2-NEXT: vpcmpgtw %ymm6, %ymm2, %ymm1 1450; AVX2-NEXT: vpacksswb %ymm0, %ymm1, %ymm0 1451; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] 1452; AVX2-NEXT: vpmovmskb %ymm0, %edx 1453; AVX2-NEXT: shlq $32, %rdx 1454; AVX2-NEXT: orq %rcx, %rdx 1455; AVX2-NEXT: movq %rdx, (%rdi) 1456; AVX2-NEXT: vzeroupper 1457; AVX2-NEXT: retq 1458; 1459; AVX512F-LABEL: test_cmp_v64i16: 1460; AVX512F: # %bb.0: 1461; AVX512F-NEXT: movq %rdi, %rax 1462; AVX512F-NEXT: vpcmpgtw %ymm2, %ymm0, %ymm4 1463; AVX512F-NEXT: vpmovsxwd %ymm4, %zmm4 1464; AVX512F-NEXT: vptestmd %zmm4, %zmm4, %k0 1465; AVX512F-NEXT: vextracti64x4 $1, %zmm2, %ymm2 1466; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm0 1467; AVX512F-NEXT: vpcmpgtw %ymm2, %ymm0, %ymm0 1468; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0 1469; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1 1470; AVX512F-NEXT: vpcmpgtw %ymm3, %ymm1, %ymm0 1471; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0 1472; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k2 1473; AVX512F-NEXT: vextracti64x4 $1, %zmm3, %ymm0 1474; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm1 1475; AVX512F-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 1476; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0 1477; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k3 1478; AVX512F-NEXT: kmovw %k3, 6(%rdi) 1479; AVX512F-NEXT: kmovw %k2, 4(%rdi) 1480; AVX512F-NEXT: kmovw %k1, 2(%rdi) 1481; AVX512F-NEXT: kmovw %k0, (%rdi) 1482; AVX512F-NEXT: vzeroupper 1483; AVX512F-NEXT: retq 1484; 1485; AVX512DQ-LABEL: test_cmp_v64i16: 1486; AVX512DQ: # %bb.0: 1487; AVX512DQ-NEXT: movq %rdi, %rax 1488; AVX512DQ-NEXT: vpcmpgtw %ymm2, %ymm0, %ymm4 1489; AVX512DQ-NEXT: vpmovsxwd %ymm4, %zmm4 1490; AVX512DQ-NEXT: vpmovd2m %zmm4, %k0 1491; AVX512DQ-NEXT: vextracti64x4 $1, %zmm2, %ymm2 1492; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm0 1493; AVX512DQ-NEXT: vpcmpgtw %ymm2, %ymm0, %ymm0 1494; AVX512DQ-NEXT: vpmovsxwd %ymm0, %zmm0 1495; AVX512DQ-NEXT: vpmovd2m %zmm0, %k1 1496; AVX512DQ-NEXT: vpcmpgtw %ymm3, %ymm1, %ymm0 1497; AVX512DQ-NEXT: vpmovsxwd %ymm0, %zmm0 1498; AVX512DQ-NEXT: vpmovd2m %zmm0, %k2 1499; AVX512DQ-NEXT: vextracti64x4 $1, %zmm3, %ymm0 1500; AVX512DQ-NEXT: vextracti64x4 $1, %zmm1, %ymm1 1501; AVX512DQ-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 1502; AVX512DQ-NEXT: vpmovsxwd %ymm0, %zmm0 1503; AVX512DQ-NEXT: vpmovd2m %zmm0, %k3 1504; AVX512DQ-NEXT: kmovw %k3, 6(%rdi) 1505; AVX512DQ-NEXT: kmovw %k2, 4(%rdi) 1506; AVX512DQ-NEXT: kmovw %k1, 2(%rdi) 1507; AVX512DQ-NEXT: kmovw %k0, (%rdi) 1508; AVX512DQ-NEXT: vzeroupper 1509; AVX512DQ-NEXT: retq 1510; 1511; AVX512BW-LABEL: test_cmp_v64i16: 1512; AVX512BW: # %bb.0: 1513; AVX512BW-NEXT: vpcmpgtw %zmm2, %zmm0, %k0 1514; AVX512BW-NEXT: vpcmpgtw %zmm3, %zmm1, %k1 1515; AVX512BW-NEXT: kunpckdq %k0, %k1, %k0 1516; AVX512BW-NEXT: vpmovm2b %k0, %zmm0 1517; AVX512BW-NEXT: retq 1518 %1 = icmp sgt <64 x i16> %a0, %a1 1519 ret <64 x i1> %1 1520} 1521 1522define <128 x i1> @test_cmp_v128i8(<128 x i8> %a0, <128 x i8> %a1) nounwind { 1523; SSE-LABEL: test_cmp_v128i8: 1524; SSE: # %bb.0: 1525; SSE-NEXT: movq %rdi, %rax 1526; SSE-NEXT: pcmpgtb {{[0-9]+}}(%rsp), %xmm0 1527; SSE-NEXT: pmovmskb %xmm0, %ecx 1528; SSE-NEXT: pcmpgtb {{[0-9]+}}(%rsp), %xmm1 1529; SSE-NEXT: pmovmskb %xmm1, %edx 1530; SSE-NEXT: shll $16, %edx 1531; SSE-NEXT: orl %ecx, %edx 1532; SSE-NEXT: pcmpgtb {{[0-9]+}}(%rsp), %xmm2 1533; SSE-NEXT: pmovmskb %xmm2, %esi 1534; SSE-NEXT: pcmpgtb {{[0-9]+}}(%rsp), %xmm3 1535; SSE-NEXT: pmovmskb %xmm3, %ecx 1536; SSE-NEXT: shll $16, %ecx 1537; SSE-NEXT: orl %esi, %ecx 1538; SSE-NEXT: shlq $32, %rcx 1539; SSE-NEXT: orq %rdx, %rcx 1540; SSE-NEXT: pcmpgtb {{[0-9]+}}(%rsp), %xmm4 1541; SSE-NEXT: pmovmskb %xmm4, %edx 1542; SSE-NEXT: pcmpgtb {{[0-9]+}}(%rsp), %xmm5 1543; SSE-NEXT: pmovmskb %xmm5, %esi 1544; SSE-NEXT: shll $16, %esi 1545; SSE-NEXT: orl %edx, %esi 1546; SSE-NEXT: pcmpgtb {{[0-9]+}}(%rsp), %xmm6 1547; SSE-NEXT: pmovmskb %xmm6, %edx 1548; SSE-NEXT: pcmpgtb {{[0-9]+}}(%rsp), %xmm7 1549; SSE-NEXT: pmovmskb %xmm7, %edi 1550; SSE-NEXT: shll $16, %edi 1551; SSE-NEXT: orl %edx, %edi 1552; SSE-NEXT: shlq $32, %rdi 1553; SSE-NEXT: orq %rsi, %rdi 1554; SSE-NEXT: movq %rdi, 8(%rax) 1555; SSE-NEXT: movq %rcx, (%rax) 1556; SSE-NEXT: retq 1557; 1558; AVX1-LABEL: test_cmp_v128i8: 1559; AVX1: # %bb.0: 1560; AVX1-NEXT: movq %rdi, %rax 1561; AVX1-NEXT: vpcmpgtb %xmm4, %xmm0, %xmm8 1562; AVX1-NEXT: vpmovmskb %xmm8, %ecx 1563; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm4 1564; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1565; AVX1-NEXT: vpcmpgtb %xmm4, %xmm0, %xmm0 1566; AVX1-NEXT: vpmovmskb %xmm0, %edx 1567; AVX1-NEXT: shll $16, %edx 1568; AVX1-NEXT: orl %ecx, %edx 1569; AVX1-NEXT: vpcmpgtb %xmm5, %xmm1, %xmm0 1570; AVX1-NEXT: vpmovmskb %xmm0, %esi 1571; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm0 1572; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 1573; AVX1-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 1574; AVX1-NEXT: vpmovmskb %xmm0, %ecx 1575; AVX1-NEXT: shll $16, %ecx 1576; AVX1-NEXT: orl %esi, %ecx 1577; AVX1-NEXT: shlq $32, %rcx 1578; AVX1-NEXT: orq %rdx, %rcx 1579; AVX1-NEXT: vpcmpgtb %xmm6, %xmm2, %xmm0 1580; AVX1-NEXT: vpmovmskb %xmm0, %edx 1581; AVX1-NEXT: vextractf128 $1, %ymm6, %xmm0 1582; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm1 1583; AVX1-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 1584; AVX1-NEXT: vpmovmskb %xmm0, %esi 1585; AVX1-NEXT: shll $16, %esi 1586; AVX1-NEXT: orl %edx, %esi 1587; AVX1-NEXT: vpcmpgtb %xmm7, %xmm3, %xmm0 1588; AVX1-NEXT: vpmovmskb %xmm0, %edx 1589; AVX1-NEXT: vextractf128 $1, %ymm7, %xmm0 1590; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm1 1591; AVX1-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 1592; AVX1-NEXT: vpmovmskb %xmm0, %edi 1593; AVX1-NEXT: shll $16, %edi 1594; AVX1-NEXT: orl %edx, %edi 1595; AVX1-NEXT: shlq $32, %rdi 1596; AVX1-NEXT: orq %rsi, %rdi 1597; AVX1-NEXT: movq %rdi, 8(%rax) 1598; AVX1-NEXT: movq %rcx, (%rax) 1599; AVX1-NEXT: vzeroupper 1600; AVX1-NEXT: retq 1601; 1602; AVX2-LABEL: test_cmp_v128i8: 1603; AVX2: # %bb.0: 1604; AVX2-NEXT: movq %rdi, %rax 1605; AVX2-NEXT: vpcmpgtb %ymm4, %ymm0, %ymm0 1606; AVX2-NEXT: vpmovmskb %ymm0, %ecx 1607; AVX2-NEXT: vpcmpgtb %ymm5, %ymm1, %ymm0 1608; AVX2-NEXT: vpmovmskb %ymm0, %edx 1609; AVX2-NEXT: shlq $32, %rdx 1610; AVX2-NEXT: orq %rcx, %rdx 1611; AVX2-NEXT: vpcmpgtb %ymm6, %ymm2, %ymm0 1612; AVX2-NEXT: vpmovmskb %ymm0, %ecx 1613; AVX2-NEXT: vpcmpgtb %ymm7, %ymm3, %ymm0 1614; AVX2-NEXT: vpmovmskb %ymm0, %esi 1615; AVX2-NEXT: shlq $32, %rsi 1616; AVX2-NEXT: orq %rcx, %rsi 1617; AVX2-NEXT: movq %rsi, 8(%rdi) 1618; AVX2-NEXT: movq %rdx, (%rdi) 1619; AVX2-NEXT: vzeroupper 1620; AVX2-NEXT: retq 1621; 1622; AVX512F-LABEL: test_cmp_v128i8: 1623; AVX512F: # %bb.0: 1624; AVX512F-NEXT: movq %rdi, %rax 1625; AVX512F-NEXT: vpcmpgtb %ymm2, %ymm0, %ymm4 1626; AVX512F-NEXT: vpmovsxbd %xmm4, %zmm5 1627; AVX512F-NEXT: vptestmd %zmm5, %zmm5, %k0 1628; AVX512F-NEXT: vextracti128 $1, %ymm4, %xmm4 1629; AVX512F-NEXT: vpmovsxbd %xmm4, %zmm4 1630; AVX512F-NEXT: vptestmd %zmm4, %zmm4, %k1 1631; AVX512F-NEXT: vextracti64x4 $1, %zmm2, %ymm2 1632; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm0 1633; AVX512F-NEXT: vpcmpgtb %ymm2, %ymm0, %ymm0 1634; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm2 1635; AVX512F-NEXT: vptestmd %zmm2, %zmm2, %k2 1636; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0 1637; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 1638; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k3 1639; AVX512F-NEXT: vpcmpgtb %ymm3, %ymm1, %ymm0 1640; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm2 1641; AVX512F-NEXT: vptestmd %zmm2, %zmm2, %k4 1642; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0 1643; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 1644; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k5 1645; AVX512F-NEXT: vextracti64x4 $1, %zmm3, %ymm0 1646; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm1 1647; AVX512F-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0 1648; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm1 1649; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k6 1650; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0 1651; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 1652; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k7 1653; AVX512F-NEXT: kmovw %k7, 14(%rdi) 1654; AVX512F-NEXT: kmovw %k6, 12(%rdi) 1655; AVX512F-NEXT: kmovw %k5, 10(%rdi) 1656; AVX512F-NEXT: kmovw %k4, 8(%rdi) 1657; AVX512F-NEXT: kmovw %k3, 6(%rdi) 1658; AVX512F-NEXT: kmovw %k2, 4(%rdi) 1659; AVX512F-NEXT: kmovw %k1, 2(%rdi) 1660; AVX512F-NEXT: kmovw %k0, (%rdi) 1661; AVX512F-NEXT: vzeroupper 1662; AVX512F-NEXT: retq 1663; 1664; AVX512DQ-LABEL: test_cmp_v128i8: 1665; AVX512DQ: # %bb.0: 1666; AVX512DQ-NEXT: movq %rdi, %rax 1667; AVX512DQ-NEXT: vpcmpgtb %ymm2, %ymm0, %ymm4 1668; AVX512DQ-NEXT: vpmovsxbd %xmm4, %zmm5 1669; AVX512DQ-NEXT: vpmovd2m %zmm5, %k0 1670; AVX512DQ-NEXT: vextracti128 $1, %ymm4, %xmm4 1671; AVX512DQ-NEXT: vpmovsxbd %xmm4, %zmm4 1672; AVX512DQ-NEXT: vpmovd2m %zmm4, %k1 1673; AVX512DQ-NEXT: vextracti64x4 $1, %zmm2, %ymm2 1674; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm0 1675; AVX512DQ-NEXT: vpcmpgtb %ymm2, %ymm0, %ymm0 1676; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm2 1677; AVX512DQ-NEXT: vpmovd2m %zmm2, %k2 1678; AVX512DQ-NEXT: vextracti128 $1, %ymm0, %xmm0 1679; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0 1680; AVX512DQ-NEXT: vpmovd2m %zmm0, %k3 1681; AVX512DQ-NEXT: vpcmpgtb %ymm3, %ymm1, %ymm0 1682; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm2 1683; AVX512DQ-NEXT: vpmovd2m %zmm2, %k4 1684; AVX512DQ-NEXT: vextracti128 $1, %ymm0, %xmm0 1685; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0 1686; AVX512DQ-NEXT: vpmovd2m %zmm0, %k5 1687; AVX512DQ-NEXT: vextracti64x4 $1, %zmm3, %ymm0 1688; AVX512DQ-NEXT: vextracti64x4 $1, %zmm1, %ymm1 1689; AVX512DQ-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0 1690; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm1 1691; AVX512DQ-NEXT: vpmovd2m %zmm1, %k6 1692; AVX512DQ-NEXT: vextracti128 $1, %ymm0, %xmm0 1693; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0 1694; AVX512DQ-NEXT: vpmovd2m %zmm0, %k7 1695; AVX512DQ-NEXT: kmovw %k7, 14(%rdi) 1696; AVX512DQ-NEXT: kmovw %k6, 12(%rdi) 1697; AVX512DQ-NEXT: kmovw %k5, 10(%rdi) 1698; AVX512DQ-NEXT: kmovw %k4, 8(%rdi) 1699; AVX512DQ-NEXT: kmovw %k3, 6(%rdi) 1700; AVX512DQ-NEXT: kmovw %k2, 4(%rdi) 1701; AVX512DQ-NEXT: kmovw %k1, 2(%rdi) 1702; AVX512DQ-NEXT: kmovw %k0, (%rdi) 1703; AVX512DQ-NEXT: vzeroupper 1704; AVX512DQ-NEXT: retq 1705; 1706; AVX512BW-LABEL: test_cmp_v128i8: 1707; AVX512BW: # %bb.0: 1708; AVX512BW-NEXT: movq %rdi, %rax 1709; AVX512BW-NEXT: vpcmpgtb %zmm2, %zmm0, %k0 1710; AVX512BW-NEXT: vpcmpgtb %zmm3, %zmm1, %k1 1711; AVX512BW-NEXT: kmovq %k1, 8(%rdi) 1712; AVX512BW-NEXT: kmovq %k0, (%rdi) 1713; AVX512BW-NEXT: vzeroupper 1714; AVX512BW-NEXT: retq 1715 %1 = icmp sgt <128 x i8> %a0, %a1 1716 ret <128 x i1> %1 1717} 1718 1719; 1720; 2048-bit vector comparisons 1721; 1722 1723define <32 x i1> @test_cmp_v32f64(<32 x double> %a0, <32 x double> %a1) nounwind { 1724; SSE-LABEL: test_cmp_v32f64: 1725; SSE: # %bb.0: 1726; SSE-NEXT: movq %rdi, %rax 1727; SSE-NEXT: movapd {{[0-9]+}}(%rsp), %xmm8 1728; SSE-NEXT: cmpltpd %xmm7, %xmm8 1729; SSE-NEXT: movapd {{[0-9]+}}(%rsp), %xmm7 1730; SSE-NEXT: cmpltpd %xmm6, %xmm7 1731; SSE-NEXT: packssdw %xmm8, %xmm7 1732; SSE-NEXT: movapd {{[0-9]+}}(%rsp), %xmm6 1733; SSE-NEXT: cmpltpd %xmm5, %xmm6 1734; SSE-NEXT: movapd {{[0-9]+}}(%rsp), %xmm5 1735; SSE-NEXT: cmpltpd %xmm4, %xmm5 1736; SSE-NEXT: packssdw %xmm6, %xmm5 1737; SSE-NEXT: packssdw %xmm7, %xmm5 1738; SSE-NEXT: movapd {{[0-9]+}}(%rsp), %xmm4 1739; SSE-NEXT: cmpltpd %xmm3, %xmm4 1740; SSE-NEXT: movapd {{[0-9]+}}(%rsp), %xmm3 1741; SSE-NEXT: cmpltpd %xmm2, %xmm3 1742; SSE-NEXT: packssdw %xmm4, %xmm3 1743; SSE-NEXT: movapd {{[0-9]+}}(%rsp), %xmm2 1744; SSE-NEXT: cmpltpd %xmm1, %xmm2 1745; SSE-NEXT: movapd {{[0-9]+}}(%rsp), %xmm4 1746; SSE-NEXT: cmpltpd %xmm0, %xmm4 1747; SSE-NEXT: packssdw %xmm2, %xmm4 1748; SSE-NEXT: movapd {{[0-9]+}}(%rsp), %xmm0 1749; SSE-NEXT: packssdw %xmm3, %xmm4 1750; SSE-NEXT: movapd {{[0-9]+}}(%rsp), %xmm1 1751; SSE-NEXT: packsswb %xmm5, %xmm4 1752; SSE-NEXT: movapd {{[0-9]+}}(%rsp), %xmm2 1753; SSE-NEXT: pmovmskb %xmm4, %ecx 1754; SSE-NEXT: movapd {{[0-9]+}}(%rsp), %xmm3 1755; SSE-NEXT: cmpltpd {{[0-9]+}}(%rsp), %xmm3 1756; SSE-NEXT: cmpltpd {{[0-9]+}}(%rsp), %xmm2 1757; SSE-NEXT: packssdw %xmm3, %xmm2 1758; SSE-NEXT: movapd {{[0-9]+}}(%rsp), %xmm3 1759; SSE-NEXT: cmpltpd {{[0-9]+}}(%rsp), %xmm3 1760; SSE-NEXT: cmpltpd {{[0-9]+}}(%rsp), %xmm1 1761; SSE-NEXT: packssdw %xmm3, %xmm1 1762; SSE-NEXT: movapd {{[0-9]+}}(%rsp), %xmm3 1763; SSE-NEXT: packssdw %xmm2, %xmm1 1764; SSE-NEXT: movapd {{[0-9]+}}(%rsp), %xmm2 1765; SSE-NEXT: cmpltpd {{[0-9]+}}(%rsp), %xmm2 1766; SSE-NEXT: cmpltpd {{[0-9]+}}(%rsp), %xmm3 1767; SSE-NEXT: packssdw %xmm2, %xmm3 1768; SSE-NEXT: movapd {{[0-9]+}}(%rsp), %xmm2 1769; SSE-NEXT: cmpltpd {{[0-9]+}}(%rsp), %xmm2 1770; SSE-NEXT: cmpltpd {{[0-9]+}}(%rsp), %xmm0 1771; SSE-NEXT: packssdw %xmm2, %xmm0 1772; SSE-NEXT: packssdw %xmm3, %xmm0 1773; SSE-NEXT: packsswb %xmm1, %xmm0 1774; SSE-NEXT: pmovmskb %xmm0, %edx 1775; SSE-NEXT: shll $16, %edx 1776; SSE-NEXT: orl %ecx, %edx 1777; SSE-NEXT: movl %edx, (%rdi) 1778; SSE-NEXT: retq 1779; 1780; AVX1-LABEL: test_cmp_v32f64: 1781; AVX1: # %bb.0: 1782; AVX1-NEXT: pushq %rbp 1783; AVX1-NEXT: movq %rsp, %rbp 1784; AVX1-NEXT: andq $-32, %rsp 1785; AVX1-NEXT: subq $32, %rsp 1786; AVX1-NEXT: vmovapd 16(%rbp), %ymm8 1787; AVX1-NEXT: vmovapd 48(%rbp), %ymm9 1788; AVX1-NEXT: vmovapd 80(%rbp), %ymm10 1789; AVX1-NEXT: vmovapd 112(%rbp), %ymm11 1790; AVX1-NEXT: vmovapd 144(%rbp), %ymm12 1791; AVX1-NEXT: vmovapd 176(%rbp), %ymm13 1792; AVX1-NEXT: vmovapd 208(%rbp), %ymm14 1793; AVX1-NEXT: vmovapd 240(%rbp), %ymm15 1794; AVX1-NEXT: vcmpltpd %ymm7, %ymm15, %ymm7 1795; AVX1-NEXT: vextractf128 $1, %ymm7, %xmm15 1796; AVX1-NEXT: vpackssdw %xmm15, %xmm7, %xmm7 1797; AVX1-NEXT: vcmpltpd %ymm6, %ymm14, %ymm6 1798; AVX1-NEXT: vextractf128 $1, %ymm6, %xmm14 1799; AVX1-NEXT: vpackssdw %xmm14, %xmm6, %xmm6 1800; AVX1-NEXT: vpackssdw %xmm7, %xmm6, %xmm6 1801; AVX1-NEXT: vcmpltpd %ymm5, %ymm13, %ymm5 1802; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm7 1803; AVX1-NEXT: vpackssdw %xmm7, %xmm5, %xmm5 1804; AVX1-NEXT: vcmpltpd %ymm4, %ymm12, %ymm4 1805; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm7 1806; AVX1-NEXT: vpackssdw %xmm7, %xmm4, %xmm4 1807; AVX1-NEXT: vpackssdw %xmm5, %xmm4, %xmm4 1808; AVX1-NEXT: vpacksswb %xmm6, %xmm4, %xmm4 1809; AVX1-NEXT: vcmpltpd %ymm3, %ymm11, %ymm3 1810; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm5 1811; AVX1-NEXT: vpackssdw %xmm5, %xmm3, %xmm3 1812; AVX1-NEXT: vcmpltpd %ymm2, %ymm10, %ymm2 1813; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm5 1814; AVX1-NEXT: vpackssdw %xmm5, %xmm2, %xmm2 1815; AVX1-NEXT: vpackssdw %xmm3, %xmm2, %xmm2 1816; AVX1-NEXT: vcmpltpd %ymm1, %ymm9, %ymm1 1817; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 1818; AVX1-NEXT: vpackssdw %xmm3, %xmm1, %xmm1 1819; AVX1-NEXT: vcmpltpd %ymm0, %ymm8, %ymm0 1820; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1821; AVX1-NEXT: vpackssdw %xmm3, %xmm0, %xmm0 1822; AVX1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 1823; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0 1824; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0 1825; AVX1-NEXT: movq %rbp, %rsp 1826; AVX1-NEXT: popq %rbp 1827; AVX1-NEXT: retq 1828; 1829; AVX2-LABEL: test_cmp_v32f64: 1830; AVX2: # %bb.0: 1831; AVX2-NEXT: pushq %rbp 1832; AVX2-NEXT: movq %rsp, %rbp 1833; AVX2-NEXT: andq $-32, %rsp 1834; AVX2-NEXT: subq $32, %rsp 1835; AVX2-NEXT: vmovapd 16(%rbp), %ymm8 1836; AVX2-NEXT: vmovapd 48(%rbp), %ymm9 1837; AVX2-NEXT: vmovapd 80(%rbp), %ymm10 1838; AVX2-NEXT: vmovapd 112(%rbp), %ymm11 1839; AVX2-NEXT: vmovapd 144(%rbp), %ymm12 1840; AVX2-NEXT: vmovapd 176(%rbp), %ymm13 1841; AVX2-NEXT: vmovapd 208(%rbp), %ymm14 1842; AVX2-NEXT: vmovapd 240(%rbp), %ymm15 1843; AVX2-NEXT: vcmpltpd %ymm7, %ymm15, %ymm7 1844; AVX2-NEXT: vcmpltpd %ymm6, %ymm14, %ymm6 1845; AVX2-NEXT: vpackssdw %ymm7, %ymm6, %ymm6 1846; AVX2-NEXT: vpermq {{.*#+}} ymm6 = ymm6[0,2,1,3] 1847; AVX2-NEXT: vcmpltpd %ymm5, %ymm13, %ymm5 1848; AVX2-NEXT: vcmpltpd %ymm4, %ymm12, %ymm4 1849; AVX2-NEXT: vpackssdw %ymm5, %ymm4, %ymm4 1850; AVX2-NEXT: vpermq {{.*#+}} ymm4 = ymm4[0,2,1,3] 1851; AVX2-NEXT: vpackssdw %ymm6, %ymm4, %ymm4 1852; AVX2-NEXT: vpermq {{.*#+}} ymm4 = ymm4[0,2,1,3] 1853; AVX2-NEXT: vcmpltpd %ymm3, %ymm11, %ymm3 1854; AVX2-NEXT: vcmpltpd %ymm2, %ymm10, %ymm2 1855; AVX2-NEXT: vpackssdw %ymm3, %ymm2, %ymm2 1856; AVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,1,3] 1857; AVX2-NEXT: vcmpltpd %ymm1, %ymm9, %ymm1 1858; AVX2-NEXT: vcmpltpd %ymm0, %ymm8, %ymm0 1859; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 1860; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] 1861; AVX2-NEXT: vpackssdw %ymm2, %ymm0, %ymm0 1862; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] 1863; AVX2-NEXT: vpacksswb %ymm4, %ymm0, %ymm0 1864; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] 1865; AVX2-NEXT: movq %rbp, %rsp 1866; AVX2-NEXT: popq %rbp 1867; AVX2-NEXT: retq 1868; 1869; AVX512F-LABEL: test_cmp_v32f64: 1870; AVX512F: # %bb.0: 1871; AVX512F-NEXT: vcmpltpd %zmm2, %zmm6, %k0 1872; AVX512F-NEXT: vcmpltpd %zmm3, %zmm7, %k1 1873; AVX512F-NEXT: kunpckbw %k0, %k1, %k1 1874; AVX512F-NEXT: vcmpltpd %zmm0, %zmm4, %k0 1875; AVX512F-NEXT: vcmpltpd %zmm1, %zmm5, %k2 1876; AVX512F-NEXT: kunpckbw %k0, %k2, %k2 1877; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z} 1878; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 1879; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} 1880; AVX512F-NEXT: vpmovdb %zmm1, %xmm1 1881; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 1882; AVX512F-NEXT: retq 1883; 1884; AVX512DQ-LABEL: test_cmp_v32f64: 1885; AVX512DQ: # %bb.0: 1886; AVX512DQ-NEXT: vcmpltpd %zmm2, %zmm6, %k0 1887; AVX512DQ-NEXT: vcmpltpd %zmm3, %zmm7, %k1 1888; AVX512DQ-NEXT: kunpckbw %k0, %k1, %k0 1889; AVX512DQ-NEXT: vcmpltpd %zmm0, %zmm4, %k1 1890; AVX512DQ-NEXT: vcmpltpd %zmm1, %zmm5, %k2 1891; AVX512DQ-NEXT: kunpckbw %k1, %k2, %k1 1892; AVX512DQ-NEXT: vpmovm2d %k1, %zmm0 1893; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 1894; AVX512DQ-NEXT: vpmovm2d %k0, %zmm1 1895; AVX512DQ-NEXT: vpmovdb %zmm1, %xmm1 1896; AVX512DQ-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 1897; AVX512DQ-NEXT: retq 1898; 1899; AVX512BW-LABEL: test_cmp_v32f64: 1900; AVX512BW: # %bb.0: 1901; AVX512BW-NEXT: vcmpltpd %zmm0, %zmm4, %k0 1902; AVX512BW-NEXT: vcmpltpd %zmm1, %zmm5, %k1 1903; AVX512BW-NEXT: kunpckbw %k0, %k1, %k0 1904; AVX512BW-NEXT: vcmpltpd %zmm2, %zmm6, %k1 1905; AVX512BW-NEXT: vcmpltpd %zmm3, %zmm7, %k2 1906; AVX512BW-NEXT: kunpckbw %k1, %k2, %k1 1907; AVX512BW-NEXT: kunpckwd %k0, %k1, %k0 1908; AVX512BW-NEXT: vpmovm2b %k0, %zmm0 1909; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 1910; AVX512BW-NEXT: retq 1911 %1 = fcmp ogt <32 x double> %a0, %a1 1912 ret <32 x i1> %1 1913} 1914 1915define <32 x i1> @test_cmp_v32i64(<32 x i64> %a0, <32 x i64> %a1) nounwind { 1916; SSE2-LABEL: test_cmp_v32i64: 1917; SSE2: # %bb.0: 1918; SSE2-NEXT: movq %rdi, %rax 1919; SSE2-NEXT: movdqa {{.*#+}} xmm8 = [2147483648,2147483648] 1920; SSE2-NEXT: pxor %xmm8, %xmm7 1921; SSE2-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm9 1922; SSE2-NEXT: pxor %xmm8, %xmm9 1923; SSE2-NEXT: movdqa %xmm7, %xmm10 1924; SSE2-NEXT: pcmpgtd %xmm9, %xmm10 1925; SSE2-NEXT: pshufd {{.*#+}} xmm11 = xmm10[0,0,2,2] 1926; SSE2-NEXT: pcmpeqd %xmm7, %xmm9 1927; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm9[1,1,3,3] 1928; SSE2-NEXT: pand %xmm11, %xmm7 1929; SSE2-NEXT: pshufd {{.*#+}} xmm9 = xmm10[1,1,3,3] 1930; SSE2-NEXT: por %xmm7, %xmm9 1931; SSE2-NEXT: pxor %xmm8, %xmm6 1932; SSE2-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm7 1933; SSE2-NEXT: pxor %xmm8, %xmm7 1934; SSE2-NEXT: movdqa %xmm6, %xmm10 1935; SSE2-NEXT: pcmpgtd %xmm7, %xmm10 1936; SSE2-NEXT: pshufd {{.*#+}} xmm11 = xmm10[0,0,2,2] 1937; SSE2-NEXT: pcmpeqd %xmm6, %xmm7 1938; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm7[1,1,3,3] 1939; SSE2-NEXT: pand %xmm11, %xmm7 1940; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm10[1,1,3,3] 1941; SSE2-NEXT: por %xmm7, %xmm6 1942; SSE2-NEXT: packssdw %xmm9, %xmm6 1943; SSE2-NEXT: pxor %xmm8, %xmm5 1944; SSE2-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm7 1945; SSE2-NEXT: pxor %xmm8, %xmm7 1946; SSE2-NEXT: movdqa %xmm5, %xmm9 1947; SSE2-NEXT: pcmpgtd %xmm7, %xmm9 1948; SSE2-NEXT: pshufd {{.*#+}} xmm10 = xmm9[0,0,2,2] 1949; SSE2-NEXT: pcmpeqd %xmm5, %xmm7 1950; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3] 1951; SSE2-NEXT: pand %xmm10, %xmm5 1952; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm9[1,1,3,3] 1953; SSE2-NEXT: por %xmm5, %xmm7 1954; SSE2-NEXT: pxor %xmm8, %xmm4 1955; SSE2-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm5 1956; SSE2-NEXT: pxor %xmm8, %xmm5 1957; SSE2-NEXT: movdqa %xmm4, %xmm9 1958; SSE2-NEXT: pcmpgtd %xmm5, %xmm9 1959; SSE2-NEXT: pshufd {{.*#+}} xmm10 = xmm9[0,0,2,2] 1960; SSE2-NEXT: pcmpeqd %xmm4, %xmm5 1961; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3] 1962; SSE2-NEXT: pand %xmm10, %xmm5 1963; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm9[1,1,3,3] 1964; SSE2-NEXT: por %xmm5, %xmm4 1965; SSE2-NEXT: packssdw %xmm7, %xmm4 1966; SSE2-NEXT: packssdw %xmm6, %xmm4 1967; SSE2-NEXT: pxor %xmm8, %xmm3 1968; SSE2-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm5 1969; SSE2-NEXT: pxor %xmm8, %xmm5 1970; SSE2-NEXT: movdqa %xmm3, %xmm6 1971; SSE2-NEXT: pcmpgtd %xmm5, %xmm6 1972; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2] 1973; SSE2-NEXT: pcmpeqd %xmm3, %xmm5 1974; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm5[1,1,3,3] 1975; SSE2-NEXT: pand %xmm7, %xmm3 1976; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm6[1,1,3,3] 1977; SSE2-NEXT: por %xmm3, %xmm5 1978; SSE2-NEXT: pxor %xmm8, %xmm2 1979; SSE2-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm3 1980; SSE2-NEXT: pxor %xmm8, %xmm3 1981; SSE2-NEXT: movdqa %xmm2, %xmm6 1982; SSE2-NEXT: pcmpgtd %xmm3, %xmm6 1983; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2] 1984; SSE2-NEXT: pcmpeqd %xmm2, %xmm3 1985; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] 1986; SSE2-NEXT: pand %xmm7, %xmm3 1987; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm6[1,1,3,3] 1988; SSE2-NEXT: por %xmm3, %xmm2 1989; SSE2-NEXT: packssdw %xmm5, %xmm2 1990; SSE2-NEXT: pxor %xmm8, %xmm1 1991; SSE2-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm3 1992; SSE2-NEXT: pxor %xmm8, %xmm3 1993; SSE2-NEXT: movdqa %xmm1, %xmm5 1994; SSE2-NEXT: pcmpgtd %xmm3, %xmm5 1995; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2] 1996; SSE2-NEXT: pcmpeqd %xmm1, %xmm3 1997; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3] 1998; SSE2-NEXT: pand %xmm6, %xmm1 1999; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm5[1,1,3,3] 2000; SSE2-NEXT: por %xmm1, %xmm3 2001; SSE2-NEXT: pxor %xmm8, %xmm0 2002; SSE2-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm1 2003; SSE2-NEXT: pxor %xmm8, %xmm1 2004; SSE2-NEXT: movdqa %xmm0, %xmm5 2005; SSE2-NEXT: pcmpgtd %xmm1, %xmm5 2006; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2] 2007; SSE2-NEXT: pcmpeqd %xmm0, %xmm1 2008; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3] 2009; SSE2-NEXT: pand %xmm6, %xmm0 2010; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm5[1,1,3,3] 2011; SSE2-NEXT: por %xmm0, %xmm1 2012; SSE2-NEXT: packssdw %xmm3, %xmm1 2013; SSE2-NEXT: packssdw %xmm2, %xmm1 2014; SSE2-NEXT: packsswb %xmm4, %xmm1 2015; SSE2-NEXT: pmovmskb %xmm1, %ecx 2016; SSE2-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm0 2017; SSE2-NEXT: pxor %xmm8, %xmm0 2018; SSE2-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm1 2019; SSE2-NEXT: pxor %xmm8, %xmm1 2020; SSE2-NEXT: movdqa %xmm1, %xmm2 2021; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 2022; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 2023; SSE2-NEXT: pcmpeqd %xmm0, %xmm1 2024; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3] 2025; SSE2-NEXT: pand %xmm3, %xmm0 2026; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 2027; SSE2-NEXT: por %xmm0, %xmm2 2028; SSE2-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm0 2029; SSE2-NEXT: pxor %xmm8, %xmm0 2030; SSE2-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm1 2031; SSE2-NEXT: pxor %xmm8, %xmm1 2032; SSE2-NEXT: movdqa %xmm1, %xmm3 2033; SSE2-NEXT: pcmpgtd %xmm0, %xmm3 2034; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2] 2035; SSE2-NEXT: pcmpeqd %xmm0, %xmm1 2036; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3] 2037; SSE2-NEXT: pand %xmm4, %xmm0 2038; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3] 2039; SSE2-NEXT: por %xmm0, %xmm1 2040; SSE2-NEXT: packssdw %xmm2, %xmm1 2041; SSE2-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm0 2042; SSE2-NEXT: pxor %xmm8, %xmm0 2043; SSE2-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm2 2044; SSE2-NEXT: pxor %xmm8, %xmm2 2045; SSE2-NEXT: movdqa %xmm2, %xmm3 2046; SSE2-NEXT: pcmpgtd %xmm0, %xmm3 2047; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2] 2048; SSE2-NEXT: pcmpeqd %xmm0, %xmm2 2049; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 2050; SSE2-NEXT: pand %xmm4, %xmm0 2051; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3] 2052; SSE2-NEXT: por %xmm0, %xmm2 2053; SSE2-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm0 2054; SSE2-NEXT: pxor %xmm8, %xmm0 2055; SSE2-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm3 2056; SSE2-NEXT: pxor %xmm8, %xmm3 2057; SSE2-NEXT: movdqa %xmm3, %xmm4 2058; SSE2-NEXT: pcmpgtd %xmm0, %xmm4 2059; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] 2060; SSE2-NEXT: pcmpeqd %xmm0, %xmm3 2061; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] 2062; SSE2-NEXT: pand %xmm5, %xmm3 2063; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3] 2064; SSE2-NEXT: por %xmm3, %xmm0 2065; SSE2-NEXT: packssdw %xmm2, %xmm0 2066; SSE2-NEXT: packssdw %xmm1, %xmm0 2067; SSE2-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm1 2068; SSE2-NEXT: pxor %xmm8, %xmm1 2069; SSE2-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm2 2070; SSE2-NEXT: pxor %xmm8, %xmm2 2071; SSE2-NEXT: movdqa %xmm2, %xmm3 2072; SSE2-NEXT: pcmpgtd %xmm1, %xmm3 2073; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2] 2074; SSE2-NEXT: pcmpeqd %xmm1, %xmm2 2075; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] 2076; SSE2-NEXT: pand %xmm4, %xmm1 2077; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3] 2078; SSE2-NEXT: por %xmm1, %xmm2 2079; SSE2-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm1 2080; SSE2-NEXT: pxor %xmm8, %xmm1 2081; SSE2-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm3 2082; SSE2-NEXT: pxor %xmm8, %xmm3 2083; SSE2-NEXT: movdqa %xmm3, %xmm4 2084; SSE2-NEXT: pcmpgtd %xmm1, %xmm4 2085; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] 2086; SSE2-NEXT: pcmpeqd %xmm1, %xmm3 2087; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] 2088; SSE2-NEXT: pand %xmm5, %xmm3 2089; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm4[1,1,3,3] 2090; SSE2-NEXT: por %xmm3, %xmm1 2091; SSE2-NEXT: packssdw %xmm2, %xmm1 2092; SSE2-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm2 2093; SSE2-NEXT: pxor %xmm8, %xmm2 2094; SSE2-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm3 2095; SSE2-NEXT: pxor %xmm8, %xmm3 2096; SSE2-NEXT: movdqa %xmm3, %xmm4 2097; SSE2-NEXT: pcmpgtd %xmm2, %xmm4 2098; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] 2099; SSE2-NEXT: pcmpeqd %xmm2, %xmm3 2100; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3] 2101; SSE2-NEXT: pand %xmm5, %xmm2 2102; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3] 2103; SSE2-NEXT: por %xmm2, %xmm3 2104; SSE2-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm2 2105; SSE2-NEXT: pxor %xmm8, %xmm2 2106; SSE2-NEXT: pxor {{[0-9]+}}(%rsp), %xmm8 2107; SSE2-NEXT: movdqa %xmm8, %xmm4 2108; SSE2-NEXT: pcmpgtd %xmm2, %xmm4 2109; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] 2110; SSE2-NEXT: pcmpeqd %xmm2, %xmm8 2111; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm8[1,1,3,3] 2112; SSE2-NEXT: pand %xmm5, %xmm2 2113; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] 2114; SSE2-NEXT: por %xmm2, %xmm4 2115; SSE2-NEXT: packssdw %xmm3, %xmm4 2116; SSE2-NEXT: packssdw %xmm1, %xmm4 2117; SSE2-NEXT: packsswb %xmm0, %xmm4 2118; SSE2-NEXT: pmovmskb %xmm4, %edx 2119; SSE2-NEXT: shll $16, %edx 2120; SSE2-NEXT: orl %ecx, %edx 2121; SSE2-NEXT: movl %edx, (%rdi) 2122; SSE2-NEXT: retq 2123; 2124; SSE42-LABEL: test_cmp_v32i64: 2125; SSE42: # %bb.0: 2126; SSE42-NEXT: movq %rdi, %rax 2127; SSE42-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm8 2128; SSE42-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm11 2129; SSE42-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm10 2130; SSE42-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm12 2131; SSE42-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm9 2132; SSE42-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm14 2133; SSE42-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm13 2134; SSE42-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm15 2135; SSE42-NEXT: pcmpgtq {{[0-9]+}}(%rsp), %xmm7 2136; SSE42-NEXT: pcmpgtq {{[0-9]+}}(%rsp), %xmm6 2137; SSE42-NEXT: packssdw %xmm7, %xmm6 2138; SSE42-NEXT: pcmpgtq {{[0-9]+}}(%rsp), %xmm5 2139; SSE42-NEXT: pcmpgtq {{[0-9]+}}(%rsp), %xmm4 2140; SSE42-NEXT: packssdw %xmm5, %xmm4 2141; SSE42-NEXT: packssdw %xmm6, %xmm4 2142; SSE42-NEXT: pcmpgtq {{[0-9]+}}(%rsp), %xmm3 2143; SSE42-NEXT: pcmpgtq {{[0-9]+}}(%rsp), %xmm2 2144; SSE42-NEXT: packssdw %xmm3, %xmm2 2145; SSE42-NEXT: pcmpgtq {{[0-9]+}}(%rsp), %xmm1 2146; SSE42-NEXT: pcmpgtq {{[0-9]+}}(%rsp), %xmm0 2147; SSE42-NEXT: packssdw %xmm1, %xmm0 2148; SSE42-NEXT: packssdw %xmm2, %xmm0 2149; SSE42-NEXT: packsswb %xmm4, %xmm0 2150; SSE42-NEXT: pmovmskb %xmm0, %ecx 2151; SSE42-NEXT: pcmpgtq {{[0-9]+}}(%rsp), %xmm15 2152; SSE42-NEXT: pcmpgtq {{[0-9]+}}(%rsp), %xmm13 2153; SSE42-NEXT: packssdw %xmm15, %xmm13 2154; SSE42-NEXT: pcmpgtq {{[0-9]+}}(%rsp), %xmm14 2155; SSE42-NEXT: pcmpgtq {{[0-9]+}}(%rsp), %xmm9 2156; SSE42-NEXT: packssdw %xmm14, %xmm9 2157; SSE42-NEXT: packssdw %xmm13, %xmm9 2158; SSE42-NEXT: pcmpgtq {{[0-9]+}}(%rsp), %xmm12 2159; SSE42-NEXT: pcmpgtq {{[0-9]+}}(%rsp), %xmm10 2160; SSE42-NEXT: packssdw %xmm12, %xmm10 2161; SSE42-NEXT: pcmpgtq {{[0-9]+}}(%rsp), %xmm11 2162; SSE42-NEXT: pcmpgtq {{[0-9]+}}(%rsp), %xmm8 2163; SSE42-NEXT: packssdw %xmm11, %xmm8 2164; SSE42-NEXT: packssdw %xmm10, %xmm8 2165; SSE42-NEXT: packsswb %xmm9, %xmm8 2166; SSE42-NEXT: pmovmskb %xmm8, %edx 2167; SSE42-NEXT: shll $16, %edx 2168; SSE42-NEXT: orl %ecx, %edx 2169; SSE42-NEXT: movl %edx, (%rdi) 2170; SSE42-NEXT: retq 2171; 2172; AVX1-LABEL: test_cmp_v32i64: 2173; AVX1: # %bb.0: 2174; AVX1-NEXT: pushq %rbp 2175; AVX1-NEXT: movq %rsp, %rbp 2176; AVX1-NEXT: andq $-32, %rsp 2177; AVX1-NEXT: subq $32, %rsp 2178; AVX1-NEXT: vextractf128 $1, %ymm7, %xmm8 2179; AVX1-NEXT: vpcmpgtq 256(%rbp), %xmm8, %xmm8 2180; AVX1-NEXT: vpcmpgtq 240(%rbp), %xmm7, %xmm7 2181; AVX1-NEXT: vpackssdw %xmm8, %xmm7, %xmm7 2182; AVX1-NEXT: vextractf128 $1, %ymm6, %xmm8 2183; AVX1-NEXT: vpcmpgtq 224(%rbp), %xmm8, %xmm8 2184; AVX1-NEXT: vpcmpgtq 208(%rbp), %xmm6, %xmm6 2185; AVX1-NEXT: vpackssdw %xmm8, %xmm6, %xmm6 2186; AVX1-NEXT: vpackssdw %xmm7, %xmm6, %xmm6 2187; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm7 2188; AVX1-NEXT: vpcmpgtq 192(%rbp), %xmm7, %xmm7 2189; AVX1-NEXT: vpcmpgtq 176(%rbp), %xmm5, %xmm5 2190; AVX1-NEXT: vpackssdw %xmm7, %xmm5, %xmm5 2191; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm7 2192; AVX1-NEXT: vpcmpgtq 160(%rbp), %xmm7, %xmm7 2193; AVX1-NEXT: vpcmpgtq 144(%rbp), %xmm4, %xmm4 2194; AVX1-NEXT: vpackssdw %xmm7, %xmm4, %xmm4 2195; AVX1-NEXT: vpackssdw %xmm5, %xmm4, %xmm4 2196; AVX1-NEXT: vpacksswb %xmm6, %xmm4, %xmm4 2197; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm5 2198; AVX1-NEXT: vpcmpgtq 128(%rbp), %xmm5, %xmm5 2199; AVX1-NEXT: vpcmpgtq 112(%rbp), %xmm3, %xmm3 2200; AVX1-NEXT: vpackssdw %xmm5, %xmm3, %xmm3 2201; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm5 2202; AVX1-NEXT: vpcmpgtq 96(%rbp), %xmm5, %xmm5 2203; AVX1-NEXT: vpcmpgtq 80(%rbp), %xmm2, %xmm2 2204; AVX1-NEXT: vpackssdw %xmm5, %xmm2, %xmm2 2205; AVX1-NEXT: vpackssdw %xmm3, %xmm2, %xmm2 2206; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 2207; AVX1-NEXT: vpcmpgtq 64(%rbp), %xmm3, %xmm3 2208; AVX1-NEXT: vpcmpgtq 48(%rbp), %xmm1, %xmm1 2209; AVX1-NEXT: vpackssdw %xmm3, %xmm1, %xmm1 2210; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 2211; AVX1-NEXT: vpcmpgtq 32(%rbp), %xmm3, %xmm3 2212; AVX1-NEXT: vpcmpgtq 16(%rbp), %xmm0, %xmm0 2213; AVX1-NEXT: vpackssdw %xmm3, %xmm0, %xmm0 2214; AVX1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 2215; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0 2216; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0 2217; AVX1-NEXT: movq %rbp, %rsp 2218; AVX1-NEXT: popq %rbp 2219; AVX1-NEXT: retq 2220; 2221; AVX2-LABEL: test_cmp_v32i64: 2222; AVX2: # %bb.0: 2223; AVX2-NEXT: pushq %rbp 2224; AVX2-NEXT: movq %rsp, %rbp 2225; AVX2-NEXT: andq $-32, %rsp 2226; AVX2-NEXT: subq $32, %rsp 2227; AVX2-NEXT: vpcmpgtq 240(%rbp), %ymm7, %ymm7 2228; AVX2-NEXT: vpcmpgtq 208(%rbp), %ymm6, %ymm6 2229; AVX2-NEXT: vpackssdw %ymm7, %ymm6, %ymm6 2230; AVX2-NEXT: vpermq {{.*#+}} ymm6 = ymm6[0,2,1,3] 2231; AVX2-NEXT: vpcmpgtq 176(%rbp), %ymm5, %ymm5 2232; AVX2-NEXT: vpcmpgtq 144(%rbp), %ymm4, %ymm4 2233; AVX2-NEXT: vpackssdw %ymm5, %ymm4, %ymm4 2234; AVX2-NEXT: vpermq {{.*#+}} ymm4 = ymm4[0,2,1,3] 2235; AVX2-NEXT: vpackssdw %ymm6, %ymm4, %ymm4 2236; AVX2-NEXT: vpermq {{.*#+}} ymm4 = ymm4[0,2,1,3] 2237; AVX2-NEXT: vpcmpgtq 112(%rbp), %ymm3, %ymm3 2238; AVX2-NEXT: vpcmpgtq 80(%rbp), %ymm2, %ymm2 2239; AVX2-NEXT: vpackssdw %ymm3, %ymm2, %ymm2 2240; AVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,1,3] 2241; AVX2-NEXT: vpcmpgtq 48(%rbp), %ymm1, %ymm1 2242; AVX2-NEXT: vpcmpgtq 16(%rbp), %ymm0, %ymm0 2243; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 2244; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] 2245; AVX2-NEXT: vpackssdw %ymm2, %ymm0, %ymm0 2246; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] 2247; AVX2-NEXT: vpacksswb %ymm4, %ymm0, %ymm0 2248; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] 2249; AVX2-NEXT: movq %rbp, %rsp 2250; AVX2-NEXT: popq %rbp 2251; AVX2-NEXT: retq 2252; 2253; AVX512F-LABEL: test_cmp_v32i64: 2254; AVX512F: # %bb.0: 2255; AVX512F-NEXT: vpcmpgtq %zmm6, %zmm2, %k0 2256; AVX512F-NEXT: vpcmpgtq %zmm7, %zmm3, %k1 2257; AVX512F-NEXT: kunpckbw %k0, %k1, %k1 2258; AVX512F-NEXT: vpcmpgtq %zmm4, %zmm0, %k0 2259; AVX512F-NEXT: vpcmpgtq %zmm5, %zmm1, %k2 2260; AVX512F-NEXT: kunpckbw %k0, %k2, %k2 2261; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z} 2262; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 2263; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} 2264; AVX512F-NEXT: vpmovdb %zmm1, %xmm1 2265; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 2266; AVX512F-NEXT: retq 2267; 2268; AVX512DQ-LABEL: test_cmp_v32i64: 2269; AVX512DQ: # %bb.0: 2270; AVX512DQ-NEXT: vpcmpgtq %zmm6, %zmm2, %k0 2271; AVX512DQ-NEXT: vpcmpgtq %zmm7, %zmm3, %k1 2272; AVX512DQ-NEXT: kunpckbw %k0, %k1, %k0 2273; AVX512DQ-NEXT: vpcmpgtq %zmm4, %zmm0, %k1 2274; AVX512DQ-NEXT: vpcmpgtq %zmm5, %zmm1, %k2 2275; AVX512DQ-NEXT: kunpckbw %k1, %k2, %k1 2276; AVX512DQ-NEXT: vpmovm2d %k1, %zmm0 2277; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 2278; AVX512DQ-NEXT: vpmovm2d %k0, %zmm1 2279; AVX512DQ-NEXT: vpmovdb %zmm1, %xmm1 2280; AVX512DQ-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 2281; AVX512DQ-NEXT: retq 2282; 2283; AVX512BW-LABEL: test_cmp_v32i64: 2284; AVX512BW: # %bb.0: 2285; AVX512BW-NEXT: vpcmpgtq %zmm4, %zmm0, %k0 2286; AVX512BW-NEXT: vpcmpgtq %zmm5, %zmm1, %k1 2287; AVX512BW-NEXT: kunpckbw %k0, %k1, %k0 2288; AVX512BW-NEXT: vpcmpgtq %zmm6, %zmm2, %k1 2289; AVX512BW-NEXT: vpcmpgtq %zmm7, %zmm3, %k2 2290; AVX512BW-NEXT: kunpckbw %k1, %k2, %k1 2291; AVX512BW-NEXT: kunpckwd %k0, %k1, %k0 2292; AVX512BW-NEXT: vpmovm2b %k0, %zmm0 2293; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 2294; AVX512BW-NEXT: retq 2295 %1 = icmp sgt <32 x i64> %a0, %a1 2296 ret <32 x i1> %1 2297} 2298