1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=SSE 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX1 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX2 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW 7 8define i32 @v32i16(<32 x i16> %a, <32 x i16> %b) { 9; SSE-LABEL: v32i16: 10; SSE: # %bb.0: 11; SSE-NEXT: pcmpgtw %xmm5, %xmm1 12; SSE-NEXT: pcmpgtw %xmm4, %xmm0 13; SSE-NEXT: packsswb %xmm1, %xmm0 14; SSE-NEXT: pmovmskb %xmm0, %ecx 15; SSE-NEXT: pcmpgtw %xmm7, %xmm3 16; SSE-NEXT: pcmpgtw %xmm6, %xmm2 17; SSE-NEXT: packsswb %xmm3, %xmm2 18; SSE-NEXT: pmovmskb %xmm2, %eax 19; SSE-NEXT: shll $16, %eax 20; SSE-NEXT: orl %ecx, %eax 21; SSE-NEXT: retq 22; 23; AVX1-LABEL: v32i16: 24; AVX1: # %bb.0: 25; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4 26; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5 27; AVX1-NEXT: vpcmpgtw %xmm4, %xmm5, %xmm4 28; AVX1-NEXT: vpcmpgtw %xmm2, %xmm0, %xmm0 29; AVX1-NEXT: vpacksswb %xmm4, %xmm0, %xmm0 30; AVX1-NEXT: vpmovmskb %xmm0, %ecx 31; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm0 32; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 33; AVX1-NEXT: vpcmpgtw %xmm0, %xmm2, %xmm0 34; AVX1-NEXT: vpcmpgtw %xmm3, %xmm1, %xmm1 35; AVX1-NEXT: vpacksswb %xmm0, %xmm1, %xmm0 36; AVX1-NEXT: vpmovmskb %xmm0, %eax 37; AVX1-NEXT: shll $16, %eax 38; AVX1-NEXT: orl %ecx, %eax 39; AVX1-NEXT: vzeroupper 40; AVX1-NEXT: retq 41; 42; AVX2-LABEL: v32i16: 43; AVX2: # %bb.0: 44; AVX2-NEXT: vpcmpgtw %ymm3, %ymm1, %ymm1 45; AVX2-NEXT: vpcmpgtw %ymm2, %ymm0, %ymm0 46; AVX2-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 47; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] 48; AVX2-NEXT: vpmovmskb %ymm0, %eax 49; AVX2-NEXT: vzeroupper 50; AVX2-NEXT: retq 51; 52; AVX512F-LABEL: v32i16: 53; AVX512F: # %bb.0: 54; AVX512F-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm2 55; AVX512F-NEXT: vpmovsxwd %ymm2, %zmm2 56; AVX512F-NEXT: vptestmd %zmm2, %zmm2, %k0 57; AVX512F-NEXT: kmovw %k0, %ecx 58; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm1 59; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm0 60; AVX512F-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 61; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0 62; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 63; AVX512F-NEXT: kmovw %k0, %eax 64; AVX512F-NEXT: shll $16, %eax 65; AVX512F-NEXT: orl %ecx, %eax 66; AVX512F-NEXT: vzeroupper 67; AVX512F-NEXT: retq 68; 69; AVX512BW-LABEL: v32i16: 70; AVX512BW: # %bb.0: 71; AVX512BW-NEXT: vpcmpgtw %zmm1, %zmm0, %k0 72; AVX512BW-NEXT: kmovd %k0, %eax 73; AVX512BW-NEXT: vzeroupper 74; AVX512BW-NEXT: retq 75 %x = icmp sgt <32 x i16> %a, %b 76 %res = bitcast <32 x i1> %x to i32 77 ret i32 %res 78} 79 80define i16 @v16i32(<16 x i32> %a, <16 x i32> %b) { 81; SSE-LABEL: v16i32: 82; SSE: # %bb.0: 83; SSE-NEXT: pcmpgtd %xmm7, %xmm3 84; SSE-NEXT: pcmpgtd %xmm6, %xmm2 85; SSE-NEXT: packssdw %xmm3, %xmm2 86; SSE-NEXT: pcmpgtd %xmm5, %xmm1 87; SSE-NEXT: pcmpgtd %xmm4, %xmm0 88; SSE-NEXT: packssdw %xmm1, %xmm0 89; SSE-NEXT: packsswb %xmm2, %xmm0 90; SSE-NEXT: pmovmskb %xmm0, %eax 91; SSE-NEXT: # kill: def $ax killed $ax killed $eax 92; SSE-NEXT: retq 93; 94; AVX1-LABEL: v16i32: 95; AVX1: # %bb.0: 96; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 97; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm5 98; AVX1-NEXT: vpcmpgtd %xmm4, %xmm5, %xmm4 99; AVX1-NEXT: vpcmpgtd %xmm3, %xmm1, %xmm1 100; AVX1-NEXT: vpackssdw %xmm4, %xmm1, %xmm1 101; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3 102; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 103; AVX1-NEXT: vpcmpgtd %xmm3, %xmm4, %xmm3 104; AVX1-NEXT: vpcmpgtd %xmm2, %xmm0, %xmm0 105; AVX1-NEXT: vpackssdw %xmm3, %xmm0, %xmm0 106; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 107; AVX1-NEXT: vpmovmskb %xmm0, %eax 108; AVX1-NEXT: # kill: def $ax killed $ax killed $eax 109; AVX1-NEXT: vzeroupper 110; AVX1-NEXT: retq 111; 112; AVX2-LABEL: v16i32: 113; AVX2: # %bb.0: 114; AVX2-NEXT: vpcmpgtd %ymm3, %ymm1, %ymm1 115; AVX2-NEXT: vpcmpgtd %ymm2, %ymm0, %ymm0 116; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 117; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 118; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 119; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] 120; AVX2-NEXT: vpmovmskb %xmm0, %eax 121; AVX2-NEXT: # kill: def $ax killed $ax killed $eax 122; AVX2-NEXT: vzeroupper 123; AVX2-NEXT: retq 124; 125; AVX512F-LABEL: v16i32: 126; AVX512F: # %bb.0: 127; AVX512F-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 128; AVX512F-NEXT: kmovw %k0, %eax 129; AVX512F-NEXT: # kill: def $ax killed $ax killed $eax 130; AVX512F-NEXT: vzeroupper 131; AVX512F-NEXT: retq 132; 133; AVX512BW-LABEL: v16i32: 134; AVX512BW: # %bb.0: 135; AVX512BW-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 136; AVX512BW-NEXT: kmovd %k0, %eax 137; AVX512BW-NEXT: # kill: def $ax killed $ax killed $eax 138; AVX512BW-NEXT: vzeroupper 139; AVX512BW-NEXT: retq 140 %x = icmp sgt <16 x i32> %a, %b 141 %res = bitcast <16 x i1> %x to i16 142 ret i16 %res 143} 144 145define i16 @v16f32(<16 x float> %a, <16 x float> %b) { 146; SSE-LABEL: v16f32: 147; SSE: # %bb.0: 148; SSE-NEXT: cmpltps %xmm3, %xmm7 149; SSE-NEXT: cmpltps %xmm2, %xmm6 150; SSE-NEXT: packssdw %xmm7, %xmm6 151; SSE-NEXT: cmpltps %xmm1, %xmm5 152; SSE-NEXT: cmpltps %xmm0, %xmm4 153; SSE-NEXT: packssdw %xmm5, %xmm4 154; SSE-NEXT: packsswb %xmm6, %xmm4 155; SSE-NEXT: pmovmskb %xmm4, %eax 156; SSE-NEXT: # kill: def $ax killed $ax killed $eax 157; SSE-NEXT: retq 158; 159; AVX1-LABEL: v16f32: 160; AVX1: # %bb.0: 161; AVX1-NEXT: vcmpltps %ymm1, %ymm3, %ymm1 162; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 163; AVX1-NEXT: vpackssdw %xmm3, %xmm1, %xmm1 164; AVX1-NEXT: vcmpltps %ymm0, %ymm2, %ymm0 165; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 166; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 167; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 168; AVX1-NEXT: vpmovmskb %xmm0, %eax 169; AVX1-NEXT: # kill: def $ax killed $ax killed $eax 170; AVX1-NEXT: vzeroupper 171; AVX1-NEXT: retq 172; 173; AVX2-LABEL: v16f32: 174; AVX2: # %bb.0: 175; AVX2-NEXT: vcmpltps %ymm1, %ymm3, %ymm1 176; AVX2-NEXT: vcmpltps %ymm0, %ymm2, %ymm0 177; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 178; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 179; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 180; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] 181; AVX2-NEXT: vpmovmskb %xmm0, %eax 182; AVX2-NEXT: # kill: def $ax killed $ax killed $eax 183; AVX2-NEXT: vzeroupper 184; AVX2-NEXT: retq 185; 186; AVX512F-LABEL: v16f32: 187; AVX512F: # %bb.0: 188; AVX512F-NEXT: vcmpltps %zmm0, %zmm1, %k0 189; AVX512F-NEXT: kmovw %k0, %eax 190; AVX512F-NEXT: # kill: def $ax killed $ax killed $eax 191; AVX512F-NEXT: vzeroupper 192; AVX512F-NEXT: retq 193; 194; AVX512BW-LABEL: v16f32: 195; AVX512BW: # %bb.0: 196; AVX512BW-NEXT: vcmpltps %zmm0, %zmm1, %k0 197; AVX512BW-NEXT: kmovd %k0, %eax 198; AVX512BW-NEXT: # kill: def $ax killed $ax killed $eax 199; AVX512BW-NEXT: vzeroupper 200; AVX512BW-NEXT: retq 201 %x = fcmp ogt <16 x float> %a, %b 202 %res = bitcast <16 x i1> %x to i16 203 ret i16 %res 204} 205 206define i64 @v64i8(<64 x i8> %a, <64 x i8> %b) { 207; SSE-LABEL: v64i8: 208; SSE: # %bb.0: 209; SSE-NEXT: pcmpgtb %xmm4, %xmm0 210; SSE-NEXT: pmovmskb %xmm0, %eax 211; SSE-NEXT: pcmpgtb %xmm5, %xmm1 212; SSE-NEXT: pmovmskb %xmm1, %ecx 213; SSE-NEXT: shll $16, %ecx 214; SSE-NEXT: orl %eax, %ecx 215; SSE-NEXT: pcmpgtb %xmm6, %xmm2 216; SSE-NEXT: pmovmskb %xmm2, %edx 217; SSE-NEXT: pcmpgtb %xmm7, %xmm3 218; SSE-NEXT: pmovmskb %xmm3, %eax 219; SSE-NEXT: shll $16, %eax 220; SSE-NEXT: orl %edx, %eax 221; SSE-NEXT: shlq $32, %rax 222; SSE-NEXT: orq %rcx, %rax 223; SSE-NEXT: retq 224; 225; AVX1-LABEL: v64i8: 226; AVX1: # %bb.0: 227; AVX1-NEXT: vpcmpgtb %xmm2, %xmm0, %xmm4 228; AVX1-NEXT: vpmovmskb %xmm4, %eax 229; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2 230; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 231; AVX1-NEXT: vpcmpgtb %xmm2, %xmm0, %xmm0 232; AVX1-NEXT: vpmovmskb %xmm0, %ecx 233; AVX1-NEXT: shll $16, %ecx 234; AVX1-NEXT: orl %eax, %ecx 235; AVX1-NEXT: vpcmpgtb %xmm3, %xmm1, %xmm0 236; AVX1-NEXT: vpmovmskb %xmm0, %edx 237; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm0 238; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 239; AVX1-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 240; AVX1-NEXT: vpmovmskb %xmm0, %eax 241; AVX1-NEXT: shll $16, %eax 242; AVX1-NEXT: orl %edx, %eax 243; AVX1-NEXT: shlq $32, %rax 244; AVX1-NEXT: orq %rcx, %rax 245; AVX1-NEXT: vzeroupper 246; AVX1-NEXT: retq 247; 248; AVX2-LABEL: v64i8: 249; AVX2: # %bb.0: 250; AVX2-NEXT: vpcmpgtb %ymm2, %ymm0, %ymm0 251; AVX2-NEXT: vpmovmskb %ymm0, %ecx 252; AVX2-NEXT: vpcmpgtb %ymm3, %ymm1, %ymm0 253; AVX2-NEXT: vpmovmskb %ymm0, %eax 254; AVX2-NEXT: shlq $32, %rax 255; AVX2-NEXT: orq %rcx, %rax 256; AVX2-NEXT: vzeroupper 257; AVX2-NEXT: retq 258; 259; AVX512F-LABEL: v64i8: 260; AVX512F: # %bb.0: 261; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm2 262; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm3 263; AVX512F-NEXT: vpcmpgtb %ymm2, %ymm3, %ymm2 264; AVX512F-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 265; AVX512F-NEXT: vpmovmskb %ymm0, %ecx 266; AVX512F-NEXT: vpmovmskb %ymm2, %eax 267; AVX512F-NEXT: shlq $32, %rax 268; AVX512F-NEXT: orq %rcx, %rax 269; AVX512F-NEXT: vzeroupper 270; AVX512F-NEXT: retq 271; 272; AVX512BW-LABEL: v64i8: 273; AVX512BW: # %bb.0: 274; AVX512BW-NEXT: vpcmpgtb %zmm1, %zmm0, %k0 275; AVX512BW-NEXT: kmovq %k0, %rax 276; AVX512BW-NEXT: vzeroupper 277; AVX512BW-NEXT: retq 278 %x = icmp sgt <64 x i8> %a, %b 279 %res = bitcast <64 x i1> %x to i64 280 ret i64 %res 281} 282 283define i8 @v8i64(<8 x i64> %a, <8 x i64> %b) { 284; SSE-LABEL: v8i64: 285; SSE: # %bb.0: 286; SSE-NEXT: pcmpgtq %xmm7, %xmm3 287; SSE-NEXT: pcmpgtq %xmm6, %xmm2 288; SSE-NEXT: packssdw %xmm3, %xmm2 289; SSE-NEXT: pcmpgtq %xmm5, %xmm1 290; SSE-NEXT: pcmpgtq %xmm4, %xmm0 291; SSE-NEXT: packssdw %xmm1, %xmm0 292; SSE-NEXT: packssdw %xmm2, %xmm0 293; SSE-NEXT: packsswb %xmm0, %xmm0 294; SSE-NEXT: pmovmskb %xmm0, %eax 295; SSE-NEXT: # kill: def $al killed $al killed $eax 296; SSE-NEXT: retq 297; 298; AVX1-LABEL: v8i64: 299; AVX1: # %bb.0: 300; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 301; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm5 302; AVX1-NEXT: vpcmpgtq %xmm4, %xmm5, %xmm4 303; AVX1-NEXT: vpcmpgtq %xmm3, %xmm1, %xmm1 304; AVX1-NEXT: vpackssdw %xmm4, %xmm1, %xmm1 305; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3 306; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 307; AVX1-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3 308; AVX1-NEXT: vpcmpgtq %xmm2, %xmm0, %xmm0 309; AVX1-NEXT: vpackssdw %xmm3, %xmm0, %xmm0 310; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 311; AVX1-NEXT: vmovmskps %ymm0, %eax 312; AVX1-NEXT: # kill: def $al killed $al killed $eax 313; AVX1-NEXT: vzeroupper 314; AVX1-NEXT: retq 315; 316; AVX2-LABEL: v8i64: 317; AVX2: # %bb.0: 318; AVX2-NEXT: vpcmpgtq %ymm3, %ymm1, %ymm1 319; AVX2-NEXT: vpcmpgtq %ymm2, %ymm0, %ymm0 320; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 321; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] 322; AVX2-NEXT: vmovmskps %ymm0, %eax 323; AVX2-NEXT: # kill: def $al killed $al killed $eax 324; AVX2-NEXT: vzeroupper 325; AVX2-NEXT: retq 326; 327; AVX512F-LABEL: v8i64: 328; AVX512F: # %bb.0: 329; AVX512F-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 330; AVX512F-NEXT: kmovw %k0, %eax 331; AVX512F-NEXT: # kill: def $al killed $al killed $eax 332; AVX512F-NEXT: vzeroupper 333; AVX512F-NEXT: retq 334; 335; AVX512BW-LABEL: v8i64: 336; AVX512BW: # %bb.0: 337; AVX512BW-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 338; AVX512BW-NEXT: kmovd %k0, %eax 339; AVX512BW-NEXT: # kill: def $al killed $al killed $eax 340; AVX512BW-NEXT: vzeroupper 341; AVX512BW-NEXT: retq 342 %x = icmp sgt <8 x i64> %a, %b 343 %res = bitcast <8 x i1> %x to i8 344 ret i8 %res 345} 346 347define i8 @v8f64(<8 x double> %a, <8 x double> %b) { 348; SSE-LABEL: v8f64: 349; SSE: # %bb.0: 350; SSE-NEXT: cmpltpd %xmm3, %xmm7 351; SSE-NEXT: cmpltpd %xmm2, %xmm6 352; SSE-NEXT: packssdw %xmm7, %xmm6 353; SSE-NEXT: cmpltpd %xmm1, %xmm5 354; SSE-NEXT: cmpltpd %xmm0, %xmm4 355; SSE-NEXT: packssdw %xmm5, %xmm4 356; SSE-NEXT: packssdw %xmm6, %xmm4 357; SSE-NEXT: packsswb %xmm4, %xmm4 358; SSE-NEXT: pmovmskb %xmm4, %eax 359; SSE-NEXT: # kill: def $al killed $al killed $eax 360; SSE-NEXT: retq 361; 362; AVX1-LABEL: v8f64: 363; AVX1: # %bb.0: 364; AVX1-NEXT: vcmpltpd %ymm1, %ymm3, %ymm1 365; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 366; AVX1-NEXT: vpackssdw %xmm3, %xmm1, %xmm1 367; AVX1-NEXT: vcmpltpd %ymm0, %ymm2, %ymm0 368; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 369; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 370; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 371; AVX1-NEXT: vmovmskps %ymm0, %eax 372; AVX1-NEXT: # kill: def $al killed $al killed $eax 373; AVX1-NEXT: vzeroupper 374; AVX1-NEXT: retq 375; 376; AVX2-LABEL: v8f64: 377; AVX2: # %bb.0: 378; AVX2-NEXT: vcmpltpd %ymm1, %ymm3, %ymm1 379; AVX2-NEXT: vcmpltpd %ymm0, %ymm2, %ymm0 380; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 381; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] 382; AVX2-NEXT: vmovmskps %ymm0, %eax 383; AVX2-NEXT: # kill: def $al killed $al killed $eax 384; AVX2-NEXT: vzeroupper 385; AVX2-NEXT: retq 386; 387; AVX512F-LABEL: v8f64: 388; AVX512F: # %bb.0: 389; AVX512F-NEXT: vcmpltpd %zmm0, %zmm1, %k0 390; AVX512F-NEXT: kmovw %k0, %eax 391; AVX512F-NEXT: # kill: def $al killed $al killed $eax 392; AVX512F-NEXT: vzeroupper 393; AVX512F-NEXT: retq 394; 395; AVX512BW-LABEL: v8f64: 396; AVX512BW: # %bb.0: 397; AVX512BW-NEXT: vcmpltpd %zmm0, %zmm1, %k0 398; AVX512BW-NEXT: kmovd %k0, %eax 399; AVX512BW-NEXT: # kill: def $al killed $al killed $eax 400; AVX512BW-NEXT: vzeroupper 401; AVX512BW-NEXT: retq 402 %x = fcmp ogt <8 x double> %a, %b 403 %res = bitcast <8 x i1> %x to i8 404 ret i8 %res 405} 406 407define void @bitcast_64i8_store(ptr %p, <64 x i8> %a0) { 408; SSE-LABEL: bitcast_64i8_store: 409; SSE: # %bb.0: 410; SSE-NEXT: pmovmskb %xmm0, %eax 411; SSE-NEXT: pmovmskb %xmm1, %ecx 412; SSE-NEXT: shll $16, %ecx 413; SSE-NEXT: orl %eax, %ecx 414; SSE-NEXT: pmovmskb %xmm2, %eax 415; SSE-NEXT: pmovmskb %xmm3, %edx 416; SSE-NEXT: shll $16, %edx 417; SSE-NEXT: orl %eax, %edx 418; SSE-NEXT: shlq $32, %rdx 419; SSE-NEXT: orq %rcx, %rdx 420; SSE-NEXT: movq %rdx, (%rdi) 421; SSE-NEXT: retq 422; 423; AVX1-LABEL: bitcast_64i8_store: 424; AVX1: # %bb.0: 425; AVX1-NEXT: vpmovmskb %xmm0, %eax 426; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 427; AVX1-NEXT: vpmovmskb %xmm0, %ecx 428; AVX1-NEXT: shll $16, %ecx 429; AVX1-NEXT: orl %eax, %ecx 430; AVX1-NEXT: vpmovmskb %xmm1, %eax 431; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0 432; AVX1-NEXT: vpmovmskb %xmm0, %edx 433; AVX1-NEXT: shll $16, %edx 434; AVX1-NEXT: orl %eax, %edx 435; AVX1-NEXT: shlq $32, %rdx 436; AVX1-NEXT: orq %rcx, %rdx 437; AVX1-NEXT: movq %rdx, (%rdi) 438; AVX1-NEXT: vzeroupper 439; AVX1-NEXT: retq 440; 441; AVX2-LABEL: bitcast_64i8_store: 442; AVX2: # %bb.0: 443; AVX2-NEXT: vpmovmskb %ymm1, %eax 444; AVX2-NEXT: shlq $32, %rax 445; AVX2-NEXT: vpmovmskb %ymm0, %ecx 446; AVX2-NEXT: orq %rax, %rcx 447; AVX2-NEXT: movq %rcx, (%rdi) 448; AVX2-NEXT: vzeroupper 449; AVX2-NEXT: retq 450; 451; AVX512F-LABEL: bitcast_64i8_store: 452; AVX512F: # %bb.0: 453; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 454; AVX512F-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm2 455; AVX512F-NEXT: vpmovsxbd %xmm2, %zmm3 456; AVX512F-NEXT: vptestmd %zmm3, %zmm3, %k0 457; AVX512F-NEXT: vextracti128 $1, %ymm2, %xmm2 458; AVX512F-NEXT: vpmovsxbd %xmm2, %zmm2 459; AVX512F-NEXT: vptestmd %zmm2, %zmm2, %k1 460; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm0 461; AVX512F-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0 462; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm1 463; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k2 464; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0 465; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 466; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k3 467; AVX512F-NEXT: kmovw %k3, 6(%rdi) 468; AVX512F-NEXT: kmovw %k2, 4(%rdi) 469; AVX512F-NEXT: kmovw %k1, 2(%rdi) 470; AVX512F-NEXT: kmovw %k0, (%rdi) 471; AVX512F-NEXT: vzeroupper 472; AVX512F-NEXT: retq 473; 474; AVX512BW-LABEL: bitcast_64i8_store: 475; AVX512BW: # %bb.0: 476; AVX512BW-NEXT: vpmovb2m %zmm0, %k0 477; AVX512BW-NEXT: kmovq %k0, (%rdi) 478; AVX512BW-NEXT: vzeroupper 479; AVX512BW-NEXT: retq 480 %a1 = icmp slt <64 x i8> %a0, zeroinitializer 481 %a2 = bitcast <64 x i1> %a1 to i64 482 store i64 %a2, ptr %p 483 ret void 484} 485 486define void @bitcast_32i16_store(ptr %p, <32 x i16> %a0) { 487; SSE-LABEL: bitcast_32i16_store: 488; SSE: # %bb.0: 489; SSE-NEXT: packsswb %xmm1, %xmm0 490; SSE-NEXT: pmovmskb %xmm0, %eax 491; SSE-NEXT: packsswb %xmm3, %xmm2 492; SSE-NEXT: pmovmskb %xmm2, %ecx 493; SSE-NEXT: shll $16, %ecx 494; SSE-NEXT: orl %eax, %ecx 495; SSE-NEXT: movl %ecx, (%rdi) 496; SSE-NEXT: retq 497; 498; AVX1-LABEL: bitcast_32i16_store: 499; AVX1: # %bb.0: 500; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 501; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0 502; AVX1-NEXT: vpmovmskb %xmm0, %eax 503; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0 504; AVX1-NEXT: vpacksswb %xmm0, %xmm1, %xmm0 505; AVX1-NEXT: vpmovmskb %xmm0, %ecx 506; AVX1-NEXT: shll $16, %ecx 507; AVX1-NEXT: orl %eax, %ecx 508; AVX1-NEXT: movl %ecx, (%rdi) 509; AVX1-NEXT: vzeroupper 510; AVX1-NEXT: retq 511; 512; AVX2-LABEL: bitcast_32i16_store: 513; AVX2: # %bb.0: 514; AVX2-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 515; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] 516; AVX2-NEXT: vpmovmskb %ymm0, %eax 517; AVX2-NEXT: movl %eax, (%rdi) 518; AVX2-NEXT: vzeroupper 519; AVX2-NEXT: retq 520; 521; AVX512F-LABEL: bitcast_32i16_store: 522; AVX512F: # %bb.0: 523; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 524; AVX512F-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm2 525; AVX512F-NEXT: vpmovsxwd %ymm2, %zmm2 526; AVX512F-NEXT: vptestmd %zmm2, %zmm2, %k0 527; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm0 528; AVX512F-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 529; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0 530; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1 531; AVX512F-NEXT: kmovw %k1, 2(%rdi) 532; AVX512F-NEXT: kmovw %k0, (%rdi) 533; AVX512F-NEXT: vzeroupper 534; AVX512F-NEXT: retq 535; 536; AVX512BW-LABEL: bitcast_32i16_store: 537; AVX512BW: # %bb.0: 538; AVX512BW-NEXT: vpmovw2m %zmm0, %k0 539; AVX512BW-NEXT: kmovd %k0, (%rdi) 540; AVX512BW-NEXT: vzeroupper 541; AVX512BW-NEXT: retq 542 %a1 = icmp slt <32 x i16> %a0, zeroinitializer 543 %a2 = bitcast <32 x i1> %a1 to i32 544 store i32 %a2, ptr %p 545 ret void 546} 547 548define void @bitcast_16i32_store(ptr %p, <16 x i32> %a0) { 549; SSE-LABEL: bitcast_16i32_store: 550; SSE: # %bb.0: 551; SSE-NEXT: packssdw %xmm3, %xmm2 552; SSE-NEXT: packssdw %xmm1, %xmm0 553; SSE-NEXT: packsswb %xmm2, %xmm0 554; SSE-NEXT: pmovmskb %xmm0, %eax 555; SSE-NEXT: movw %ax, (%rdi) 556; SSE-NEXT: retq 557; 558; AVX1-LABEL: bitcast_16i32_store: 559; AVX1: # %bb.0: 560; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 561; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 562; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 563; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 564; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 565; AVX1-NEXT: vpmovmskb %xmm0, %eax 566; AVX1-NEXT: movw %ax, (%rdi) 567; AVX1-NEXT: vzeroupper 568; AVX1-NEXT: retq 569; 570; AVX2-LABEL: bitcast_16i32_store: 571; AVX2: # %bb.0: 572; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 573; AVX2-NEXT: vpcmpgtd %ymm1, %ymm2, %ymm1 574; AVX2-NEXT: vpcmpgtd %ymm0, %ymm2, %ymm0 575; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 576; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 577; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 578; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] 579; AVX2-NEXT: vpmovmskb %xmm0, %eax 580; AVX2-NEXT: movw %ax, (%rdi) 581; AVX2-NEXT: vzeroupper 582; AVX2-NEXT: retq 583; 584; AVX512-LABEL: bitcast_16i32_store: 585; AVX512: # %bb.0: 586; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1 587; AVX512-NEXT: vpcmpgtd %zmm0, %zmm1, %k0 588; AVX512-NEXT: kmovw %k0, (%rdi) 589; AVX512-NEXT: vzeroupper 590; AVX512-NEXT: retq 591 %a1 = icmp slt <16 x i32> %a0, zeroinitializer 592 %a2 = bitcast <16 x i1> %a1 to i16 593 store i16 %a2, ptr %p 594 ret void 595} 596 597define void @bitcast_8i64_store(ptr %p, <8 x i64> %a0) { 598; SSE-LABEL: bitcast_8i64_store: 599; SSE: # %bb.0: 600; SSE-NEXT: pxor %xmm4, %xmm4 601; SSE-NEXT: pxor %xmm5, %xmm5 602; SSE-NEXT: pcmpgtq %xmm3, %xmm5 603; SSE-NEXT: pxor %xmm3, %xmm3 604; SSE-NEXT: pcmpgtq %xmm2, %xmm3 605; SSE-NEXT: packssdw %xmm5, %xmm3 606; SSE-NEXT: pxor %xmm2, %xmm2 607; SSE-NEXT: pcmpgtq %xmm1, %xmm2 608; SSE-NEXT: pcmpgtq %xmm0, %xmm4 609; SSE-NEXT: packssdw %xmm2, %xmm4 610; SSE-NEXT: packssdw %xmm3, %xmm4 611; SSE-NEXT: packsswb %xmm4, %xmm4 612; SSE-NEXT: pmovmskb %xmm4, %eax 613; SSE-NEXT: movb %al, (%rdi) 614; SSE-NEXT: retq 615; 616; AVX1-LABEL: bitcast_8i64_store: 617; AVX1: # %bb.0: 618; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 619; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 620; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 621; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0 622; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 623; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 624; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 625; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 626; AVX1-NEXT: vmovmskps %ymm0, %eax 627; AVX1-NEXT: movb %al, (%rdi) 628; AVX1-NEXT: vzeroupper 629; AVX1-NEXT: retq 630; 631; AVX2-LABEL: bitcast_8i64_store: 632; AVX2: # %bb.0: 633; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 634; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] 635; AVX2-NEXT: vmovmskps %ymm0, %eax 636; AVX2-NEXT: movb %al, (%rdi) 637; AVX2-NEXT: vzeroupper 638; AVX2-NEXT: retq 639; 640; AVX512F-LABEL: bitcast_8i64_store: 641; AVX512F: # %bb.0: 642; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 643; AVX512F-NEXT: vpcmpgtq %zmm0, %zmm1, %k0 644; AVX512F-NEXT: kmovw %k0, %eax 645; AVX512F-NEXT: movb %al, (%rdi) 646; AVX512F-NEXT: vzeroupper 647; AVX512F-NEXT: retq 648; 649; AVX512BW-LABEL: bitcast_8i64_store: 650; AVX512BW: # %bb.0: 651; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 652; AVX512BW-NEXT: vpcmpgtq %zmm0, %zmm1, %k0 653; AVX512BW-NEXT: kmovd %k0, %eax 654; AVX512BW-NEXT: movb %al, (%rdi) 655; AVX512BW-NEXT: vzeroupper 656; AVX512BW-NEXT: retq 657 %a1 = icmp slt <8 x i64> %a0, zeroinitializer 658 %a2 = bitcast <8 x i1> %a1 to i8 659 store i8 %a2, ptr %p 660 ret void 661} 662