1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2-SSSE3 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE,SSE2-SSSE3 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX12,AVX1 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX12,AVX2 7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512bw | FileCheck %s --check-prefixes=AVX,AVX512 8 9; 10; 128-bit vectors 11; 12 13define i1 @bitcast_v2i64_to_v2i1(<2 x i64> %a0) nounwind { 14; SSE-LABEL: bitcast_v2i64_to_v2i1: 15; SSE: # %bb.0: 16; SSE-NEXT: movmskpd %xmm0, %ecx 17; SSE-NEXT: movl %ecx, %eax 18; SSE-NEXT: shrb %al 19; SSE-NEXT: addb %cl, %al 20; SSE-NEXT: retq 21; 22; AVX12-LABEL: bitcast_v2i64_to_v2i1: 23; AVX12: # %bb.0: 24; AVX12-NEXT: vmovmskpd %xmm0, %ecx 25; AVX12-NEXT: movl %ecx, %eax 26; AVX12-NEXT: shrb %al 27; AVX12-NEXT: addb %cl, %al 28; AVX12-NEXT: retq 29; 30; AVX512-LABEL: bitcast_v2i64_to_v2i1: 31; AVX512: # %bb.0: 32; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1 33; AVX512-NEXT: vpcmpgtq %xmm0, %xmm1, %k0 34; AVX512-NEXT: kshiftrw $1, %k0, %k1 35; AVX512-NEXT: kmovd %k1, %ecx 36; AVX512-NEXT: kmovd %k0, %eax 37; AVX512-NEXT: addb %cl, %al 38; AVX512-NEXT: # kill: def $al killed $al killed $eax 39; AVX512-NEXT: retq 40 %1 = icmp slt <2 x i64> %a0, zeroinitializer 41 %2 = bitcast <2 x i1> %1 to <2 x i1> 42 %3 = extractelement <2 x i1> %2, i32 0 43 %4 = extractelement <2 x i1> %2, i32 1 44 %5 = add i1 %3, %4 45 ret i1 %5 46} 47 48define i1 @trunc_v2i64_cmp(<2 x i64> %a0) nounwind { 49; SSE2-SSSE3-LABEL: trunc_v2i64_cmp: 50; SSE2-SSSE3: # %bb.0: 51; SSE2-SSSE3-NEXT: psllq $63, %xmm0 52; SSE2-SSSE3-NEXT: movmskpd %xmm0, %eax 53; SSE2-SSSE3-NEXT: testl %eax, %eax 54; SSE2-SSSE3-NEXT: sete %al 55; SSE2-SSSE3-NEXT: retq 56; 57; SSE41-LABEL: trunc_v2i64_cmp: 58; SSE41: # %bb.0: 59; SSE41-NEXT: ptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 60; SSE41-NEXT: sete %al 61; SSE41-NEXT: retq 62; 63; AVX12-LABEL: trunc_v2i64_cmp: 64; AVX12: # %bb.0: 65; AVX12-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 66; AVX12-NEXT: sete %al 67; AVX12-NEXT: retq 68; 69; AVX512-LABEL: trunc_v2i64_cmp: 70; AVX512: # %bb.0: 71; AVX512-NEXT: vpbroadcastq {{.*#+}} xmm1 = [1,1] 72; AVX512-NEXT: vptest %xmm1, %xmm0 73; AVX512-NEXT: sete %al 74; AVX512-NEXT: retq 75 %1 = trunc <2 x i64> %a0 to <2 x i1> 76 %2 = bitcast <2 x i1> %1 to i2 77 %3 = icmp eq i2 %2, 0 78 ret i1 %3 79} 80 81define i2 @bitcast_v4i32_to_v2i2(<4 x i32> %a0) nounwind { 82; SSE-LABEL: bitcast_v4i32_to_v2i2: 83; SSE: # %bb.0: 84; SSE-NEXT: movmskps %xmm0, %eax 85; SSE-NEXT: movl %eax, %ecx 86; SSE-NEXT: shrb $2, %cl 87; SSE-NEXT: andb $3, %al 88; SSE-NEXT: addb %cl, %al 89; SSE-NEXT: # kill: def $al killed $al killed $eax 90; SSE-NEXT: retq 91; 92; AVX-LABEL: bitcast_v4i32_to_v2i2: 93; AVX: # %bb.0: 94; AVX-NEXT: vmovmskps %xmm0, %eax 95; AVX-NEXT: movl %eax, %ecx 96; AVX-NEXT: shrb $2, %cl 97; AVX-NEXT: andb $3, %al 98; AVX-NEXT: addb %cl, %al 99; AVX-NEXT: # kill: def $al killed $al killed $eax 100; AVX-NEXT: retq 101 %1 = icmp slt <4 x i32> %a0, zeroinitializer 102 %2 = bitcast <4 x i1> %1 to <2 x i2> 103 %3 = extractelement <2 x i2> %2, i32 0 104 %4 = extractelement <2 x i2> %2, i32 1 105 %5 = add i2 %3, %4 106 ret i2 %5 107} 108 109define i1 @trunc_v4i32_cmp(<4 x i32> %a0) nounwind { 110; SSE2-SSSE3-LABEL: trunc_v4i32_cmp: 111; SSE2-SSSE3: # %bb.0: 112; SSE2-SSSE3-NEXT: pslld $31, %xmm0 113; SSE2-SSSE3-NEXT: movmskps %xmm0, %eax 114; SSE2-SSSE3-NEXT: xorl $15, %eax 115; SSE2-SSSE3-NEXT: sete %al 116; SSE2-SSSE3-NEXT: retq 117; 118; SSE41-LABEL: trunc_v4i32_cmp: 119; SSE41: # %bb.0: 120; SSE41-NEXT: ptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 121; SSE41-NEXT: setb %al 122; SSE41-NEXT: retq 123; 124; AVX12-LABEL: trunc_v4i32_cmp: 125; AVX12: # %bb.0: 126; AVX12-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 127; AVX12-NEXT: setb %al 128; AVX12-NEXT: retq 129; 130; AVX512-LABEL: trunc_v4i32_cmp: 131; AVX512: # %bb.0: 132; AVX512-NEXT: vpbroadcastq {{.*#+}} xmm1 = [4294967297,4294967297] 133; AVX512-NEXT: vptest %xmm1, %xmm0 134; AVX512-NEXT: setb %al 135; AVX512-NEXT: retq 136 %1 = trunc <4 x i32> %a0 to <4 x i1> 137 %2 = bitcast <4 x i1> %1 to i4 138 %3 = icmp eq i4 %2, -1 139 ret i1 %3 140} 141 142define i4 @bitcast_v8i16_to_v2i4(<8 x i16> %a0) nounwind { 143; SSE-LABEL: bitcast_v8i16_to_v2i4: 144; SSE: # %bb.0: 145; SSE-NEXT: packsswb %xmm0, %xmm0 146; SSE-NEXT: pmovmskb %xmm0, %eax 147; SSE-NEXT: movl %eax, %ecx 148; SSE-NEXT: shrb $4, %cl 149; SSE-NEXT: andb $15, %al 150; SSE-NEXT: addb %cl, %al 151; SSE-NEXT: # kill: def $al killed $al killed $eax 152; SSE-NEXT: retq 153; 154; AVX12-LABEL: bitcast_v8i16_to_v2i4: 155; AVX12: # %bb.0: 156; AVX12-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 157; AVX12-NEXT: vpmovmskb %xmm0, %eax 158; AVX12-NEXT: movl %eax, %ecx 159; AVX12-NEXT: shrb $4, %cl 160; AVX12-NEXT: andb $15, %al 161; AVX12-NEXT: addb %cl, %al 162; AVX12-NEXT: # kill: def $al killed $al killed $eax 163; AVX12-NEXT: retq 164; 165; AVX512-LABEL: bitcast_v8i16_to_v2i4: 166; AVX512: # %bb.0: 167; AVX512-NEXT: vpmovw2m %xmm0, %k0 168; AVX512-NEXT: kmovd %k0, %eax 169; AVX512-NEXT: movl %eax, %ecx 170; AVX512-NEXT: shrb $4, %cl 171; AVX512-NEXT: andb $15, %al 172; AVX512-NEXT: addb %cl, %al 173; AVX512-NEXT: # kill: def $al killed $al killed $eax 174; AVX512-NEXT: retq 175 %1 = icmp slt <8 x i16> %a0, zeroinitializer 176 %2 = bitcast <8 x i1> %1 to <2 x i4> 177 %3 = extractelement <2 x i4> %2, i32 0 178 %4 = extractelement <2 x i4> %2, i32 1 179 %5 = add i4 %3, %4 180 ret i4 %5 181} 182 183define i1 @trunc_v8i16_cmp(<8 x i16> %a0) nounwind { 184; SSE2-SSSE3-LABEL: trunc_v8i16_cmp: 185; SSE2-SSSE3: # %bb.0: 186; SSE2-SSSE3-NEXT: psllw $7, %xmm0 187; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax 188; SSE2-SSSE3-NEXT: testl $21845, %eax # imm = 0x5555 189; SSE2-SSSE3-NEXT: setne %al 190; SSE2-SSSE3-NEXT: retq 191; 192; SSE41-LABEL: trunc_v8i16_cmp: 193; SSE41: # %bb.0: 194; SSE41-NEXT: ptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 195; SSE41-NEXT: setne %al 196; SSE41-NEXT: retq 197; 198; AVX12-LABEL: trunc_v8i16_cmp: 199; AVX12: # %bb.0: 200; AVX12-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 201; AVX12-NEXT: setne %al 202; AVX12-NEXT: retq 203; 204; AVX512-LABEL: trunc_v8i16_cmp: 205; AVX512: # %bb.0: 206; AVX512-NEXT: vpbroadcastq {{.*#+}} xmm1 = [281479271743489,281479271743489] 207; AVX512-NEXT: vptest %xmm1, %xmm0 208; AVX512-NEXT: setne %al 209; AVX512-NEXT: retq 210 %1 = trunc <8 x i16> %a0 to <8 x i1> 211 %2 = bitcast <8 x i1> %1 to i8 212 %3 = icmp ne i8 %2, 0 213 ret i1 %3 214} 215 216define i8 @bitcast_v16i8_to_v2i8(<16 x i8> %a0) nounwind { 217; SSE-LABEL: bitcast_v16i8_to_v2i8: 218; SSE: # %bb.0: 219; SSE-NEXT: pmovmskb %xmm0, %ecx 220; SSE-NEXT: movl %ecx, %eax 221; SSE-NEXT: shrl $8, %eax 222; SSE-NEXT: addb %cl, %al 223; SSE-NEXT: # kill: def $al killed $al killed $eax 224; SSE-NEXT: retq 225; 226; AVX12-LABEL: bitcast_v16i8_to_v2i8: 227; AVX12: # %bb.0: 228; AVX12-NEXT: vpmovmskb %xmm0, %ecx 229; AVX12-NEXT: movl %ecx, %eax 230; AVX12-NEXT: shrl $8, %eax 231; AVX12-NEXT: addb %cl, %al 232; AVX12-NEXT: # kill: def $al killed $al killed $eax 233; AVX12-NEXT: retq 234; 235; AVX512-LABEL: bitcast_v16i8_to_v2i8: 236; AVX512: # %bb.0: 237; AVX512-NEXT: vpmovb2m %xmm0, %k0 238; AVX512-NEXT: kshiftrw $8, %k0, %k1 239; AVX512-NEXT: kmovd %k0, %ecx 240; AVX512-NEXT: kmovd %k1, %eax 241; AVX512-NEXT: addb %cl, %al 242; AVX512-NEXT: # kill: def $al killed $al killed $eax 243; AVX512-NEXT: retq 244 %1 = icmp slt <16 x i8> %a0, zeroinitializer 245 %2 = bitcast <16 x i1> %1 to <2 x i8> 246 %3 = extractelement <2 x i8> %2, i32 0 247 %4 = extractelement <2 x i8> %2, i32 1 248 %5 = add i8 %3, %4 249 ret i8 %5 250} 251 252define i1 @trunc_v16i8_cmp(<16 x i8> %a0) nounwind { 253; SSE2-SSSE3-LABEL: trunc_v16i8_cmp: 254; SSE2-SSSE3: # %bb.0: 255; SSE2-SSSE3-NEXT: psllw $7, %xmm0 256; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax 257; SSE2-SSSE3-NEXT: xorl $65535, %eax # imm = 0xFFFF 258; SSE2-SSSE3-NEXT: setne %al 259; SSE2-SSSE3-NEXT: retq 260; 261; SSE41-LABEL: trunc_v16i8_cmp: 262; SSE41: # %bb.0: 263; SSE41-NEXT: ptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 264; SSE41-NEXT: setae %al 265; SSE41-NEXT: retq 266; 267; AVX12-LABEL: trunc_v16i8_cmp: 268; AVX12: # %bb.0: 269; AVX12-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 270; AVX12-NEXT: setae %al 271; AVX12-NEXT: retq 272; 273; AVX512-LABEL: trunc_v16i8_cmp: 274; AVX512: # %bb.0: 275; AVX512-NEXT: vpbroadcastq {{.*#+}} xmm1 = [72340172838076673,72340172838076673] 276; AVX512-NEXT: vptest %xmm1, %xmm0 277; AVX512-NEXT: setae %al 278; AVX512-NEXT: retq 279 %1 = trunc <16 x i8> %a0 to <16 x i1> 280 %2 = bitcast <16 x i1> %1 to i16 281 %3 = icmp ne i16 %2, -1 282 ret i1 %3 283} 284 285; 286; 256-bit vectors 287; 288 289define i2 @bitcast_v4i64_to_v2i2(<4 x i64> %a0) nounwind { 290; SSE-LABEL: bitcast_v4i64_to_v2i2: 291; SSE: # %bb.0: 292; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3] 293; SSE-NEXT: movmskps %xmm0, %eax 294; SSE-NEXT: movl %eax, %ecx 295; SSE-NEXT: shrb $2, %cl 296; SSE-NEXT: andb $3, %al 297; SSE-NEXT: addb %cl, %al 298; SSE-NEXT: # kill: def $al killed $al killed $eax 299; SSE-NEXT: retq 300; 301; AVX-LABEL: bitcast_v4i64_to_v2i2: 302; AVX: # %bb.0: 303; AVX-NEXT: vmovmskpd %ymm0, %eax 304; AVX-NEXT: movl %eax, %ecx 305; AVX-NEXT: shrb $2, %cl 306; AVX-NEXT: andb $3, %al 307; AVX-NEXT: addb %cl, %al 308; AVX-NEXT: # kill: def $al killed $al killed $eax 309; AVX-NEXT: vzeroupper 310; AVX-NEXT: retq 311 %1 = icmp slt <4 x i64> %a0, zeroinitializer 312 %2 = bitcast <4 x i1> %1 to <2 x i2> 313 %3 = extractelement <2 x i2> %2, i32 0 314 %4 = extractelement <2 x i2> %2, i32 1 315 %5 = add i2 %3, %4 316 ret i2 %5 317} 318 319define i1 @trunc_v4i64_cmp(<4 x i64> %a0) nounwind { 320; SSE2-SSSE3-LABEL: trunc_v4i64_cmp: 321; SSE2-SSSE3: # %bb.0: 322; SSE2-SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] 323; SSE2-SSSE3-NEXT: pslld $31, %xmm0 324; SSE2-SSSE3-NEXT: movmskps %xmm0, %eax 325; SSE2-SSSE3-NEXT: testl %eax, %eax 326; SSE2-SSSE3-NEXT: setne %al 327; SSE2-SSSE3-NEXT: retq 328; 329; SSE41-LABEL: trunc_v4i64_cmp: 330; SSE41: # %bb.0: 331; SSE41-NEXT: por %xmm1, %xmm0 332; SSE41-NEXT: ptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 333; SSE41-NEXT: setne %al 334; SSE41-NEXT: retq 335; 336; AVX1-LABEL: trunc_v4i64_cmp: 337; AVX1: # %bb.0: 338; AVX1-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0 339; AVX1-NEXT: setne %al 340; AVX1-NEXT: vzeroupper 341; AVX1-NEXT: retq 342; 343; AVX2-LABEL: trunc_v4i64_cmp: 344; AVX2: # %bb.0: 345; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [1,1,1,1] 346; AVX2-NEXT: vptest %ymm1, %ymm0 347; AVX2-NEXT: setne %al 348; AVX2-NEXT: vzeroupper 349; AVX2-NEXT: retq 350; 351; AVX512-LABEL: trunc_v4i64_cmp: 352; AVX512: # %bb.0: 353; AVX512-NEXT: vpbroadcastq {{.*#+}} ymm1 = [1,1,1,1] 354; AVX512-NEXT: vptest %ymm1, %ymm0 355; AVX512-NEXT: setne %al 356; AVX512-NEXT: vzeroupper 357; AVX512-NEXT: retq 358 %1 = trunc <4 x i64> %a0 to <4 x i1> 359 %2 = bitcast <4 x i1> %1 to i4 360 %3 = icmp ne i4 %2, 0 361 ret i1 %3 362} 363 364define i4 @bitcast_v8i32_to_v2i4(<8 x i32> %a0) nounwind { 365; SSE-LABEL: bitcast_v8i32_to_v2i4: 366; SSE: # %bb.0: 367; SSE-NEXT: packssdw %xmm1, %xmm0 368; SSE-NEXT: packsswb %xmm0, %xmm0 369; SSE-NEXT: pmovmskb %xmm0, %eax 370; SSE-NEXT: movl %eax, %ecx 371; SSE-NEXT: shrb $4, %cl 372; SSE-NEXT: andb $15, %al 373; SSE-NEXT: addb %cl, %al 374; SSE-NEXT: # kill: def $al killed $al killed $eax 375; SSE-NEXT: retq 376; 377; AVX-LABEL: bitcast_v8i32_to_v2i4: 378; AVX: # %bb.0: 379; AVX-NEXT: vmovmskps %ymm0, %eax 380; AVX-NEXT: movl %eax, %ecx 381; AVX-NEXT: shrb $4, %cl 382; AVX-NEXT: andb $15, %al 383; AVX-NEXT: addb %cl, %al 384; AVX-NEXT: # kill: def $al killed $al killed $eax 385; AVX-NEXT: vzeroupper 386; AVX-NEXT: retq 387 %1 = icmp slt <8 x i32> %a0, zeroinitializer 388 %2 = bitcast <8 x i1> %1 to <2 x i4> 389 %3 = extractelement <2 x i4> %2, i32 0 390 %4 = extractelement <2 x i4> %2, i32 1 391 %5 = add i4 %3, %4 392 ret i4 %5 393} 394 395define i1 @trunc_v8i132_cmp(<8 x i32> %a0) nounwind { 396; SSE2-SSSE3-LABEL: trunc_v8i132_cmp: 397; SSE2-SSSE3: # %bb.0: 398; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0 399; SSE2-SSSE3-NEXT: pslld $31, %xmm0 400; SSE2-SSSE3-NEXT: movmskps %xmm0, %eax 401; SSE2-SSSE3-NEXT: xorl $15, %eax 402; SSE2-SSSE3-NEXT: setne %al 403; SSE2-SSSE3-NEXT: retq 404; 405; SSE41-LABEL: trunc_v8i132_cmp: 406; SSE41: # %bb.0: 407; SSE41-NEXT: pand %xmm1, %xmm0 408; SSE41-NEXT: ptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 409; SSE41-NEXT: setae %al 410; SSE41-NEXT: retq 411; 412; AVX1-LABEL: trunc_v8i132_cmp: 413; AVX1: # %bb.0: 414; AVX1-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0 415; AVX1-NEXT: setae %al 416; AVX1-NEXT: vzeroupper 417; AVX1-NEXT: retq 418; 419; AVX2-LABEL: trunc_v8i132_cmp: 420; AVX2: # %bb.0: 421; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [4294967297,4294967297,4294967297,4294967297] 422; AVX2-NEXT: vptest %ymm1, %ymm0 423; AVX2-NEXT: setae %al 424; AVX2-NEXT: vzeroupper 425; AVX2-NEXT: retq 426; 427; AVX512-LABEL: trunc_v8i132_cmp: 428; AVX512: # %bb.0: 429; AVX512-NEXT: vpbroadcastq {{.*#+}} ymm1 = [4294967297,4294967297,4294967297,4294967297] 430; AVX512-NEXT: vptest %ymm1, %ymm0 431; AVX512-NEXT: setae %al 432; AVX512-NEXT: vzeroupper 433; AVX512-NEXT: retq 434 %1 = trunc <8 x i32> %a0 to <8 x i1> 435 %2 = bitcast <8 x i1> %1 to i8 436 %3 = icmp ne i8 %2, -1 437 ret i1 %3 438} 439 440define i8 @bitcast_v16i16_to_v2i8(<16 x i16> %a0) nounwind { 441; SSE-LABEL: bitcast_v16i16_to_v2i8: 442; SSE: # %bb.0: 443; SSE-NEXT: packsswb %xmm1, %xmm0 444; SSE-NEXT: pmovmskb %xmm0, %ecx 445; SSE-NEXT: movl %ecx, %eax 446; SSE-NEXT: shrl $8, %eax 447; SSE-NEXT: addb %cl, %al 448; SSE-NEXT: # kill: def $al killed $al killed $eax 449; SSE-NEXT: retq 450; 451; AVX1-LABEL: bitcast_v16i16_to_v2i8: 452; AVX1: # %bb.0: 453; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 454; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 455; AVX1-NEXT: vpmovmskb %xmm0, %ecx 456; AVX1-NEXT: movl %ecx, %eax 457; AVX1-NEXT: shrl $8, %eax 458; AVX1-NEXT: addb %cl, %al 459; AVX1-NEXT: # kill: def $al killed $al killed $eax 460; AVX1-NEXT: vzeroupper 461; AVX1-NEXT: retq 462; 463; AVX2-LABEL: bitcast_v16i16_to_v2i8: 464; AVX2: # %bb.0: 465; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 466; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 467; AVX2-NEXT: vpmovmskb %xmm0, %ecx 468; AVX2-NEXT: movl %ecx, %eax 469; AVX2-NEXT: shrl $8, %eax 470; AVX2-NEXT: addb %cl, %al 471; AVX2-NEXT: # kill: def $al killed $al killed $eax 472; AVX2-NEXT: vzeroupper 473; AVX2-NEXT: retq 474; 475; AVX512-LABEL: bitcast_v16i16_to_v2i8: 476; AVX512: # %bb.0: 477; AVX512-NEXT: vpmovw2m %ymm0, %k0 478; AVX512-NEXT: kshiftrw $8, %k0, %k1 479; AVX512-NEXT: kmovd %k0, %ecx 480; AVX512-NEXT: kmovd %k1, %eax 481; AVX512-NEXT: addb %cl, %al 482; AVX512-NEXT: # kill: def $al killed $al killed $eax 483; AVX512-NEXT: vzeroupper 484; AVX512-NEXT: retq 485 %1 = icmp slt <16 x i16> %a0, zeroinitializer 486 %2 = bitcast <16 x i1> %1 to <2 x i8> 487 %3 = extractelement <2 x i8> %2, i32 0 488 %4 = extractelement <2 x i8> %2, i32 1 489 %5 = add i8 %3, %4 490 ret i8 %5 491} 492 493define i1 @trunc_v16i16_cmp(<16 x i16> %a0) nounwind { 494; SSE2-SSSE3-LABEL: trunc_v16i16_cmp: 495; SSE2-SSSE3: # %bb.0: 496; SSE2-SSSE3-NEXT: por %xmm1, %xmm0 497; SSE2-SSSE3-NEXT: psllw $7, %xmm0 498; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax 499; SSE2-SSSE3-NEXT: testl $21845, %eax # imm = 0x5555 500; SSE2-SSSE3-NEXT: sete %al 501; SSE2-SSSE3-NEXT: retq 502; 503; SSE41-LABEL: trunc_v16i16_cmp: 504; SSE41: # %bb.0: 505; SSE41-NEXT: por %xmm1, %xmm0 506; SSE41-NEXT: ptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 507; SSE41-NEXT: sete %al 508; SSE41-NEXT: retq 509; 510; AVX1-LABEL: trunc_v16i16_cmp: 511; AVX1: # %bb.0: 512; AVX1-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0 513; AVX1-NEXT: sete %al 514; AVX1-NEXT: vzeroupper 515; AVX1-NEXT: retq 516; 517; AVX2-LABEL: trunc_v16i16_cmp: 518; AVX2: # %bb.0: 519; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [281479271743489,281479271743489,281479271743489,281479271743489] 520; AVX2-NEXT: vptest %ymm1, %ymm0 521; AVX2-NEXT: sete %al 522; AVX2-NEXT: vzeroupper 523; AVX2-NEXT: retq 524; 525; AVX512-LABEL: trunc_v16i16_cmp: 526; AVX512: # %bb.0: 527; AVX512-NEXT: vpbroadcastq {{.*#+}} ymm1 = [281479271743489,281479271743489,281479271743489,281479271743489] 528; AVX512-NEXT: vptest %ymm1, %ymm0 529; AVX512-NEXT: sete %al 530; AVX512-NEXT: vzeroupper 531; AVX512-NEXT: retq 532 %1 = trunc <16 x i16> %a0 to <16 x i1> 533 %2 = bitcast <16 x i1> %1 to i16 534 %3 = icmp eq i16 %2, 0 535 ret i1 %3 536} 537 538define i16 @bitcast_v32i8_to_v2i16(<32 x i8> %a0) nounwind { 539; SSE-LABEL: bitcast_v32i8_to_v2i16: 540; SSE: # %bb.0: 541; SSE-NEXT: pmovmskb %xmm1, %ecx 542; SSE-NEXT: pmovmskb %xmm0, %eax 543; SSE-NEXT: addl %ecx, %eax 544; SSE-NEXT: # kill: def $ax killed $ax killed $eax 545; SSE-NEXT: retq 546; 547; AVX1-LABEL: bitcast_v32i8_to_v2i16: 548; AVX1: # %bb.0: 549; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 550; AVX1-NEXT: vpmovmskb %xmm1, %ecx 551; AVX1-NEXT: vpmovmskb %xmm0, %eax 552; AVX1-NEXT: addl %ecx, %eax 553; AVX1-NEXT: # kill: def $ax killed $ax killed $eax 554; AVX1-NEXT: vzeroupper 555; AVX1-NEXT: retq 556; 557; AVX2-LABEL: bitcast_v32i8_to_v2i16: 558; AVX2: # %bb.0: 559; AVX2-NEXT: vpmovmskb %ymm0, %ecx 560; AVX2-NEXT: movl %ecx, %eax 561; AVX2-NEXT: shrl $16, %eax 562; AVX2-NEXT: addl %ecx, %eax 563; AVX2-NEXT: # kill: def $ax killed $ax killed $eax 564; AVX2-NEXT: vzeroupper 565; AVX2-NEXT: retq 566; 567; AVX512-LABEL: bitcast_v32i8_to_v2i16: 568; AVX512: # %bb.0: 569; AVX512-NEXT: vpmovb2m %ymm0, %k0 570; AVX512-NEXT: kshiftrd $16, %k0, %k1 571; AVX512-NEXT: kmovd %k0, %ecx 572; AVX512-NEXT: kmovd %k1, %eax 573; AVX512-NEXT: addl %ecx, %eax 574; AVX512-NEXT: # kill: def $ax killed $ax killed $eax 575; AVX512-NEXT: vzeroupper 576; AVX512-NEXT: retq 577 %1 = icmp slt <32 x i8> %a0, zeroinitializer 578 %2 = bitcast <32 x i1> %1 to <2 x i16> 579 %3 = extractelement <2 x i16> %2, i32 0 580 %4 = extractelement <2 x i16> %2, i32 1 581 %5 = add i16 %3, %4 582 ret i16 %5 583} 584 585define i1 @trunc_v32i8_cmp(<32 x i8> %a0) nounwind { 586; SSE2-SSSE3-LABEL: trunc_v32i8_cmp: 587; SSE2-SSSE3: # %bb.0: 588; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0 589; SSE2-SSSE3-NEXT: psllw $7, %xmm0 590; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax 591; SSE2-SSSE3-NEXT: xorl $65535, %eax # imm = 0xFFFF 592; SSE2-SSSE3-NEXT: sete %al 593; SSE2-SSSE3-NEXT: retq 594; 595; SSE41-LABEL: trunc_v32i8_cmp: 596; SSE41: # %bb.0: 597; SSE41-NEXT: pand %xmm1, %xmm0 598; SSE41-NEXT: ptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 599; SSE41-NEXT: setb %al 600; SSE41-NEXT: retq 601; 602; AVX1-LABEL: trunc_v32i8_cmp: 603; AVX1: # %bb.0: 604; AVX1-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0 605; AVX1-NEXT: setb %al 606; AVX1-NEXT: vzeroupper 607; AVX1-NEXT: retq 608; 609; AVX2-LABEL: trunc_v32i8_cmp: 610; AVX2: # %bb.0: 611; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [72340172838076673,72340172838076673,72340172838076673,72340172838076673] 612; AVX2-NEXT: vptest %ymm1, %ymm0 613; AVX2-NEXT: setb %al 614; AVX2-NEXT: vzeroupper 615; AVX2-NEXT: retq 616; 617; AVX512-LABEL: trunc_v32i8_cmp: 618; AVX512: # %bb.0: 619; AVX512-NEXT: vpbroadcastq {{.*#+}} ymm1 = [72340172838076673,72340172838076673,72340172838076673,72340172838076673] 620; AVX512-NEXT: vptest %ymm1, %ymm0 621; AVX512-NEXT: setb %al 622; AVX512-NEXT: vzeroupper 623; AVX512-NEXT: retq 624 %1 = trunc <32 x i8> %a0 to <32 x i1> 625 %2 = bitcast <32 x i1> %1 to i32 626 %3 = icmp eq i32 %2, -1 627 ret i1 %3 628} 629 630; 631; 512-bit vectors 632; 633 634define i4 @bitcast_v8i64_to_v2i4(<8 x i64> %a0) nounwind { 635; SSE-LABEL: bitcast_v8i64_to_v2i4: 636; SSE: # %bb.0: 637; SSE-NEXT: packssdw %xmm3, %xmm2 638; SSE-NEXT: packssdw %xmm1, %xmm0 639; SSE-NEXT: packssdw %xmm2, %xmm0 640; SSE-NEXT: packsswb %xmm0, %xmm0 641; SSE-NEXT: pmovmskb %xmm0, %eax 642; SSE-NEXT: movl %eax, %ecx 643; SSE-NEXT: shrb $4, %cl 644; SSE-NEXT: andb $15, %al 645; SSE-NEXT: addb %cl, %al 646; SSE-NEXT: # kill: def $al killed $al killed $eax 647; SSE-NEXT: retq 648; 649; AVX1-LABEL: bitcast_v8i64_to_v2i4: 650; AVX1: # %bb.0: 651; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 652; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 653; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 654; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0 655; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 656; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 657; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 658; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 659; AVX1-NEXT: vmovmskps %ymm0, %eax 660; AVX1-NEXT: movl %eax, %ecx 661; AVX1-NEXT: shrb $4, %cl 662; AVX1-NEXT: andb $15, %al 663; AVX1-NEXT: addb %cl, %al 664; AVX1-NEXT: # kill: def $al killed $al killed $eax 665; AVX1-NEXT: vzeroupper 666; AVX1-NEXT: retq 667; 668; AVX2-LABEL: bitcast_v8i64_to_v2i4: 669; AVX2: # %bb.0: 670; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 671; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] 672; AVX2-NEXT: vmovmskps %ymm0, %eax 673; AVX2-NEXT: movl %eax, %ecx 674; AVX2-NEXT: shrb $4, %cl 675; AVX2-NEXT: andb $15, %al 676; AVX2-NEXT: addb %cl, %al 677; AVX2-NEXT: # kill: def $al killed $al killed $eax 678; AVX2-NEXT: vzeroupper 679; AVX2-NEXT: retq 680; 681; AVX512-LABEL: bitcast_v8i64_to_v2i4: 682; AVX512: # %bb.0: 683; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1 684; AVX512-NEXT: vpcmpgtq %zmm0, %zmm1, %k0 685; AVX512-NEXT: kmovd %k0, %eax 686; AVX512-NEXT: movl %eax, %ecx 687; AVX512-NEXT: shrb $4, %cl 688; AVX512-NEXT: andb $15, %al 689; AVX512-NEXT: addb %cl, %al 690; AVX512-NEXT: # kill: def $al killed $al killed $eax 691; AVX512-NEXT: vzeroupper 692; AVX512-NEXT: retq 693 %1 = icmp slt <8 x i64> %a0, zeroinitializer 694 %2 = bitcast <8 x i1> %1 to <2 x i4> 695 %3 = extractelement <2 x i4> %2, i32 0 696 %4 = extractelement <2 x i4> %2, i32 1 697 %5 = add i4 %3, %4 698 ret i4 %5 699} 700 701define i1 @trunc_v8i64_cmp(<8 x i64> %a0) nounwind { 702; SSE2-SSSE3-LABEL: trunc_v8i64_cmp: 703; SSE2-SSSE3: # %bb.0: 704; SSE2-SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2] 705; SSE2-SSSE3-NEXT: pslld $16, %xmm2 706; SSE2-SSSE3-NEXT: psrad $16, %xmm2 707; SSE2-SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] 708; SSE2-SSSE3-NEXT: pslld $16, %xmm0 709; SSE2-SSSE3-NEXT: psrad $16, %xmm0 710; SSE2-SSSE3-NEXT: packssdw %xmm2, %xmm0 711; SSE2-SSSE3-NEXT: psllw $15, %xmm0 712; SSE2-SSSE3-NEXT: packsswb %xmm0, %xmm0 713; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax 714; SSE2-SSSE3-NEXT: cmpb $-1, %al 715; SSE2-SSSE3-NEXT: sete %al 716; SSE2-SSSE3-NEXT: retq 717; 718; SSE41-LABEL: trunc_v8i64_cmp: 719; SSE41: # %bb.0: 720; SSE41-NEXT: pand %xmm3, %xmm1 721; SSE41-NEXT: pand %xmm2, %xmm0 722; SSE41-NEXT: pand %xmm1, %xmm0 723; SSE41-NEXT: ptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 724; SSE41-NEXT: setb %al 725; SSE41-NEXT: retq 726; 727; AVX1-LABEL: trunc_v8i64_cmp: 728; AVX1: # %bb.0: 729; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 730; AVX1-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0 731; AVX1-NEXT: setb %al 732; AVX1-NEXT: vzeroupper 733; AVX1-NEXT: retq 734; 735; AVX2-LABEL: trunc_v8i64_cmp: 736; AVX2: # %bb.0: 737; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 738; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [1,1,1,1] 739; AVX2-NEXT: vptest %ymm1, %ymm0 740; AVX2-NEXT: setb %al 741; AVX2-NEXT: vzeroupper 742; AVX2-NEXT: retq 743; 744; AVX512-LABEL: trunc_v8i64_cmp: 745; AVX512: # %bb.0: 746; AVX512-NEXT: vpbroadcastq {{.*#+}} zmm1 = [1,1,1,1,1,1,1,1] 747; AVX512-NEXT: vpandq %zmm1, %zmm0, %zmm0 748; AVX512-NEXT: vpcmpneqd %zmm1, %zmm0, %k0 749; AVX512-NEXT: kortestw %k0, %k0 750; AVX512-NEXT: sete %al 751; AVX512-NEXT: vzeroupper 752; AVX512-NEXT: retq 753 %1 = trunc <8 x i64> %a0 to <8 x i1> 754 %2 = bitcast <8 x i1> %1 to i8 755 %3 = icmp eq i8 %2, -1 756 ret i1 %3 757} 758 759define i8 @bitcast_v16i32_to_v2i8(<16 x i32> %a0) nounwind { 760; SSE-LABEL: bitcast_v16i32_to_v2i8: 761; SSE: # %bb.0: 762; SSE-NEXT: packssdw %xmm3, %xmm2 763; SSE-NEXT: packssdw %xmm1, %xmm0 764; SSE-NEXT: packsswb %xmm2, %xmm0 765; SSE-NEXT: pmovmskb %xmm0, %ecx 766; SSE-NEXT: movl %ecx, %eax 767; SSE-NEXT: shrl $8, %eax 768; SSE-NEXT: addb %cl, %al 769; SSE-NEXT: # kill: def $al killed $al killed $eax 770; SSE-NEXT: retq 771; 772; AVX1-LABEL: bitcast_v16i32_to_v2i8: 773; AVX1: # %bb.0: 774; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 775; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 776; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 777; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 778; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 779; AVX1-NEXT: vpmovmskb %xmm0, %ecx 780; AVX1-NEXT: movl %ecx, %eax 781; AVX1-NEXT: shrl $8, %eax 782; AVX1-NEXT: addb %cl, %al 783; AVX1-NEXT: # kill: def $al killed $al killed $eax 784; AVX1-NEXT: vzeroupper 785; AVX1-NEXT: retq 786; 787; AVX2-LABEL: bitcast_v16i32_to_v2i8: 788; AVX2: # %bb.0: 789; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 790; AVX2-NEXT: vpcmpgtd %ymm1, %ymm2, %ymm1 791; AVX2-NEXT: vpcmpgtd %ymm0, %ymm2, %ymm0 792; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 793; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 794; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 795; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] 796; AVX2-NEXT: vpmovmskb %xmm0, %ecx 797; AVX2-NEXT: movl %ecx, %eax 798; AVX2-NEXT: shrl $8, %eax 799; AVX2-NEXT: addb %cl, %al 800; AVX2-NEXT: # kill: def $al killed $al killed $eax 801; AVX2-NEXT: vzeroupper 802; AVX2-NEXT: retq 803; 804; AVX512-LABEL: bitcast_v16i32_to_v2i8: 805; AVX512: # %bb.0: 806; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1 807; AVX512-NEXT: vpcmpgtd %zmm0, %zmm1, %k0 808; AVX512-NEXT: kshiftrw $8, %k0, %k1 809; AVX512-NEXT: kmovd %k0, %ecx 810; AVX512-NEXT: kmovd %k1, %eax 811; AVX512-NEXT: addb %cl, %al 812; AVX512-NEXT: # kill: def $al killed $al killed $eax 813; AVX512-NEXT: vzeroupper 814; AVX512-NEXT: retq 815 %1 = icmp slt <16 x i32> %a0, zeroinitializer 816 %2 = bitcast <16 x i1> %1 to <2 x i8> 817 %3 = extractelement <2 x i8> %2, i32 0 818 %4 = extractelement <2 x i8> %2, i32 1 819 %5 = add i8 %3, %4 820 ret i8 %5 821} 822 823define i1 @trunc_v16i32_cmp(<16 x i32> %a0) nounwind { 824; SSE2-SSSE3-LABEL: trunc_v16i32_cmp: 825; SSE2-SSSE3: # %bb.0: 826; SSE2-SSSE3-NEXT: por %xmm3, %xmm1 827; SSE2-SSSE3-NEXT: por %xmm2, %xmm0 828; SSE2-SSSE3-NEXT: por %xmm1, %xmm0 829; SSE2-SSSE3-NEXT: pslld $31, %xmm0 830; SSE2-SSSE3-NEXT: movmskps %xmm0, %eax 831; SSE2-SSSE3-NEXT: testl %eax, %eax 832; SSE2-SSSE3-NEXT: sete %al 833; SSE2-SSSE3-NEXT: retq 834; 835; SSE41-LABEL: trunc_v16i32_cmp: 836; SSE41: # %bb.0: 837; SSE41-NEXT: por %xmm3, %xmm1 838; SSE41-NEXT: por %xmm2, %xmm0 839; SSE41-NEXT: por %xmm1, %xmm0 840; SSE41-NEXT: ptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 841; SSE41-NEXT: sete %al 842; SSE41-NEXT: retq 843; 844; AVX1-LABEL: trunc_v16i32_cmp: 845; AVX1: # %bb.0: 846; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 847; AVX1-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0 848; AVX1-NEXT: sete %al 849; AVX1-NEXT: vzeroupper 850; AVX1-NEXT: retq 851; 852; AVX2-LABEL: trunc_v16i32_cmp: 853; AVX2: # %bb.0: 854; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 855; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [4294967297,4294967297,4294967297,4294967297] 856; AVX2-NEXT: vptest %ymm1, %ymm0 857; AVX2-NEXT: sete %al 858; AVX2-NEXT: vzeroupper 859; AVX2-NEXT: retq 860; 861; AVX512-LABEL: trunc_v16i32_cmp: 862; AVX512: # %bb.0: 863; AVX512-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k0 864; AVX512-NEXT: kortestw %k0, %k0 865; AVX512-NEXT: sete %al 866; AVX512-NEXT: vzeroupper 867; AVX512-NEXT: retq 868 %1 = trunc <16 x i32> %a0 to <16 x i1> 869 %2 = bitcast <16 x i1> %1 to i16 870 %3 = icmp eq i16 %2, 0 871 ret i1 %3 872} 873 874define i16 @bitcast_v32i16_to_v2i16(<32 x i16> %a0) nounwind { 875; SSE-LABEL: bitcast_v32i16_to_v2i16: 876; SSE: # %bb.0: 877; SSE-NEXT: packsswb %xmm3, %xmm2 878; SSE-NEXT: pmovmskb %xmm2, %ecx 879; SSE-NEXT: packsswb %xmm1, %xmm0 880; SSE-NEXT: pmovmskb %xmm0, %eax 881; SSE-NEXT: addl %ecx, %eax 882; SSE-NEXT: # kill: def $ax killed $ax killed $eax 883; SSE-NEXT: retq 884; 885; AVX1-LABEL: bitcast_v32i16_to_v2i16: 886; AVX1: # %bb.0: 887; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 888; AVX1-NEXT: vpacksswb %xmm2, %xmm1, %xmm1 889; AVX1-NEXT: vpmovmskb %xmm1, %ecx 890; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 891; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 892; AVX1-NEXT: vpmovmskb %xmm0, %eax 893; AVX1-NEXT: addl %ecx, %eax 894; AVX1-NEXT: # kill: def $ax killed $ax killed $eax 895; AVX1-NEXT: vzeroupper 896; AVX1-NEXT: retq 897; 898; AVX2-LABEL: bitcast_v32i16_to_v2i16: 899; AVX2: # %bb.0: 900; AVX2-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 901; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] 902; AVX2-NEXT: vpmovmskb %ymm0, %ecx 903; AVX2-NEXT: movl %ecx, %eax 904; AVX2-NEXT: shrl $16, %eax 905; AVX2-NEXT: addl %ecx, %eax 906; AVX2-NEXT: # kill: def $ax killed $ax killed $eax 907; AVX2-NEXT: vzeroupper 908; AVX2-NEXT: retq 909; 910; AVX512-LABEL: bitcast_v32i16_to_v2i16: 911; AVX512: # %bb.0: 912; AVX512-NEXT: vpmovw2m %zmm0, %k0 913; AVX512-NEXT: kshiftrd $16, %k0, %k1 914; AVX512-NEXT: kmovd %k0, %ecx 915; AVX512-NEXT: kmovd %k1, %eax 916; AVX512-NEXT: addl %ecx, %eax 917; AVX512-NEXT: # kill: def $ax killed $ax killed $eax 918; AVX512-NEXT: vzeroupper 919; AVX512-NEXT: retq 920 %1 = icmp slt <32 x i16> %a0, zeroinitializer 921 %2 = bitcast <32 x i1> %1 to <2 x i16> 922 %3 = extractelement <2 x i16> %2, i32 0 923 %4 = extractelement <2 x i16> %2, i32 1 924 %5 = add i16 %3, %4 925 ret i16 %5 926} 927 928define i1 @trunc_v32i16_cmp(<32 x i16> %a0) nounwind { 929; SSE2-SSSE3-LABEL: trunc_v32i16_cmp: 930; SSE2-SSSE3: # %bb.0: 931; SSE2-SSSE3-NEXT: pand %xmm3, %xmm1 932; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0 933; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0 934; SSE2-SSSE3-NEXT: psllw $7, %xmm0 935; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax 936; SSE2-SSSE3-NEXT: notl %eax 937; SSE2-SSSE3-NEXT: testl $21845, %eax # imm = 0x5555 938; SSE2-SSSE3-NEXT: setne %al 939; SSE2-SSSE3-NEXT: retq 940; 941; SSE41-LABEL: trunc_v32i16_cmp: 942; SSE41: # %bb.0: 943; SSE41-NEXT: pand %xmm3, %xmm1 944; SSE41-NEXT: pand %xmm2, %xmm0 945; SSE41-NEXT: pand %xmm1, %xmm0 946; SSE41-NEXT: ptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 947; SSE41-NEXT: setae %al 948; SSE41-NEXT: retq 949; 950; AVX1-LABEL: trunc_v32i16_cmp: 951; AVX1: # %bb.0: 952; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 953; AVX1-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0 954; AVX1-NEXT: setae %al 955; AVX1-NEXT: vzeroupper 956; AVX1-NEXT: retq 957; 958; AVX2-LABEL: trunc_v32i16_cmp: 959; AVX2: # %bb.0: 960; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 961; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [281479271743489,281479271743489,281479271743489,281479271743489] 962; AVX2-NEXT: vptest %ymm1, %ymm0 963; AVX2-NEXT: setae %al 964; AVX2-NEXT: vzeroupper 965; AVX2-NEXT: retq 966; 967; AVX512-LABEL: trunc_v32i16_cmp: 968; AVX512: # %bb.0: 969; AVX512-NEXT: vpbroadcastw {{.*#+}} zmm1 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] 970; AVX512-NEXT: vpandq %zmm1, %zmm0, %zmm0 971; AVX512-NEXT: vpcmpneqd %zmm1, %zmm0, %k0 972; AVX512-NEXT: kortestw %k0, %k0 973; AVX512-NEXT: setne %al 974; AVX512-NEXT: vzeroupper 975; AVX512-NEXT: retq 976 %1 = trunc <32 x i16> %a0 to <32 x i1> 977 %2 = bitcast <32 x i1> %1 to i32 978 %3 = icmp ne i32 %2, -1 979 ret i1 %3 980} 981 982define i32 @bitcast_v64i8_to_v2i32(<64 x i8> %a0) nounwind { 983; SSE2-SSSE3-LABEL: bitcast_v64i8_to_v2i32: 984; SSE2-SSSE3: # %bb.0: 985; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax 986; SSE2-SSSE3-NEXT: pmovmskb %xmm1, %ecx 987; SSE2-SSSE3-NEXT: shll $16, %ecx 988; SSE2-SSSE3-NEXT: orl %eax, %ecx 989; SSE2-SSSE3-NEXT: pmovmskb %xmm2, %eax 990; SSE2-SSSE3-NEXT: pmovmskb %xmm3, %edx 991; SSE2-SSSE3-NEXT: shll $16, %edx 992; SSE2-SSSE3-NEXT: orl %eax, %edx 993; SSE2-SSSE3-NEXT: shlq $32, %rdx 994; SSE2-SSSE3-NEXT: orq %rcx, %rdx 995; SSE2-SSSE3-NEXT: movq %rdx, %xmm0 996; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] 997; SSE2-SSSE3-NEXT: movd %xmm0, %eax 998; SSE2-SSSE3-NEXT: addl %ecx, %eax 999; SSE2-SSSE3-NEXT: retq 1000; 1001; SSE41-LABEL: bitcast_v64i8_to_v2i32: 1002; SSE41: # %bb.0: 1003; SSE41-NEXT: pmovmskb %xmm2, %eax 1004; SSE41-NEXT: pmovmskb %xmm3, %ecx 1005; SSE41-NEXT: shll $16, %ecx 1006; SSE41-NEXT: orl %eax, %ecx 1007; SSE41-NEXT: pmovmskb %xmm0, %edx 1008; SSE41-NEXT: pmovmskb %xmm1, %eax 1009; SSE41-NEXT: shll $16, %eax 1010; SSE41-NEXT: orl %edx, %eax 1011; SSE41-NEXT: addl %ecx, %eax 1012; SSE41-NEXT: retq 1013; 1014; AVX1-LABEL: bitcast_v64i8_to_v2i32: 1015; AVX1: # %bb.0: 1016; AVX1-NEXT: vpmovmskb %xmm1, %eax 1017; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 1018; AVX1-NEXT: vpmovmskb %xmm1, %ecx 1019; AVX1-NEXT: shll $16, %ecx 1020; AVX1-NEXT: orl %eax, %ecx 1021; AVX1-NEXT: vpmovmskb %xmm0, %edx 1022; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1023; AVX1-NEXT: vpmovmskb %xmm0, %eax 1024; AVX1-NEXT: shll $16, %eax 1025; AVX1-NEXT: orl %edx, %eax 1026; AVX1-NEXT: addl %ecx, %eax 1027; AVX1-NEXT: vzeroupper 1028; AVX1-NEXT: retq 1029; 1030; AVX2-LABEL: bitcast_v64i8_to_v2i32: 1031; AVX2: # %bb.0: 1032; AVX2-NEXT: vpmovmskb %ymm1, %ecx 1033; AVX2-NEXT: vpmovmskb %ymm0, %eax 1034; AVX2-NEXT: addl %ecx, %eax 1035; AVX2-NEXT: vzeroupper 1036; AVX2-NEXT: retq 1037; 1038; AVX512-LABEL: bitcast_v64i8_to_v2i32: 1039; AVX512: # %bb.0: 1040; AVX512-NEXT: vpmovb2m %zmm0, %k0 1041; AVX512-NEXT: kshiftrq $32, %k0, %k1 1042; AVX512-NEXT: kmovd %k0, %ecx 1043; AVX512-NEXT: kmovd %k1, %eax 1044; AVX512-NEXT: addl %ecx, %eax 1045; AVX512-NEXT: vzeroupper 1046; AVX512-NEXT: retq 1047 %1 = icmp slt <64 x i8> %a0, zeroinitializer 1048 %2 = bitcast <64 x i1> %1 to <2 x i32> 1049 %3 = extractelement <2 x i32> %2, i32 0 1050 %4 = extractelement <2 x i32> %2, i32 1 1051 %5 = add i32 %3, %4 1052 ret i32 %5 1053} 1054 1055define i1 @trunc_v64i8_cmp(<64 x i8> %a0) nounwind { 1056; SSE2-SSSE3-LABEL: trunc_v64i8_cmp: 1057; SSE2-SSSE3: # %bb.0: 1058; SSE2-SSSE3-NEXT: por %xmm3, %xmm1 1059; SSE2-SSSE3-NEXT: por %xmm2, %xmm0 1060; SSE2-SSSE3-NEXT: por %xmm1, %xmm0 1061; SSE2-SSSE3-NEXT: psllw $7, %xmm0 1062; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax 1063; SSE2-SSSE3-NEXT: testl %eax, %eax 1064; SSE2-SSSE3-NEXT: setne %al 1065; SSE2-SSSE3-NEXT: retq 1066; 1067; SSE41-LABEL: trunc_v64i8_cmp: 1068; SSE41: # %bb.0: 1069; SSE41-NEXT: por %xmm3, %xmm1 1070; SSE41-NEXT: por %xmm2, %xmm0 1071; SSE41-NEXT: por %xmm1, %xmm0 1072; SSE41-NEXT: ptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1073; SSE41-NEXT: setne %al 1074; SSE41-NEXT: retq 1075; 1076; AVX1-LABEL: trunc_v64i8_cmp: 1077; AVX1: # %bb.0: 1078; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 1079; AVX1-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0 1080; AVX1-NEXT: setne %al 1081; AVX1-NEXT: vzeroupper 1082; AVX1-NEXT: retq 1083; 1084; AVX2-LABEL: trunc_v64i8_cmp: 1085; AVX2: # %bb.0: 1086; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 1087; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [72340172838076673,72340172838076673,72340172838076673,72340172838076673] 1088; AVX2-NEXT: vptest %ymm1, %ymm0 1089; AVX2-NEXT: setne %al 1090; AVX2-NEXT: vzeroupper 1091; AVX2-NEXT: retq 1092; 1093; AVX512-LABEL: trunc_v64i8_cmp: 1094; AVX512: # %bb.0: 1095; AVX512-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k0 1096; AVX512-NEXT: kortestw %k0, %k0 1097; AVX512-NEXT: setne %al 1098; AVX512-NEXT: vzeroupper 1099; AVX512-NEXT: retq 1100 %1 = trunc <64 x i8> %a0 to <64 x i1> 1101 %2 = bitcast <64 x i1> %1 to i64 1102 %3 = icmp ne i64 %2, 0 1103 ret i1 %3 1104} 1105 1106define i64 @bitcast_v128i8_to_v2i64(<128 x i8> %a0) nounwind { 1107; SSE-LABEL: bitcast_v128i8_to_v2i64: 1108; SSE: # %bb.0: 1109; SSE-NEXT: pmovmskb %xmm4, %eax 1110; SSE-NEXT: pmovmskb %xmm5, %ecx 1111; SSE-NEXT: shll $16, %ecx 1112; SSE-NEXT: orl %eax, %ecx 1113; SSE-NEXT: pmovmskb %xmm6, %eax 1114; SSE-NEXT: pmovmskb %xmm7, %edx 1115; SSE-NEXT: shll $16, %edx 1116; SSE-NEXT: orl %eax, %edx 1117; SSE-NEXT: shlq $32, %rdx 1118; SSE-NEXT: orq %rcx, %rdx 1119; SSE-NEXT: pmovmskb %xmm0, %eax 1120; SSE-NEXT: pmovmskb %xmm1, %ecx 1121; SSE-NEXT: shll $16, %ecx 1122; SSE-NEXT: orl %eax, %ecx 1123; SSE-NEXT: pmovmskb %xmm2, %esi 1124; SSE-NEXT: pmovmskb %xmm3, %eax 1125; SSE-NEXT: shll $16, %eax 1126; SSE-NEXT: orl %esi, %eax 1127; SSE-NEXT: shlq $32, %rax 1128; SSE-NEXT: orq %rcx, %rax 1129; SSE-NEXT: addq %rdx, %rax 1130; SSE-NEXT: retq 1131; 1132; AVX1-LABEL: bitcast_v128i8_to_v2i64: 1133; AVX1: # %bb.0: 1134; AVX1-NEXT: vpmovmskb %xmm2, %eax 1135; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2 1136; AVX1-NEXT: vpmovmskb %xmm2, %edx 1137; AVX1-NEXT: shll $16, %edx 1138; AVX1-NEXT: orl %eax, %edx 1139; AVX1-NEXT: vpmovmskb %xmm3, %eax 1140; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm2 1141; AVX1-NEXT: vpmovmskb %xmm2, %ecx 1142; AVX1-NEXT: shll $16, %ecx 1143; AVX1-NEXT: orl %eax, %ecx 1144; AVX1-NEXT: shlq $32, %rcx 1145; AVX1-NEXT: orq %rdx, %rcx 1146; AVX1-NEXT: vpmovmskb %xmm0, %eax 1147; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1148; AVX1-NEXT: vpmovmskb %xmm0, %edx 1149; AVX1-NEXT: shll $16, %edx 1150; AVX1-NEXT: orl %eax, %edx 1151; AVX1-NEXT: vpmovmskb %xmm1, %esi 1152; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0 1153; AVX1-NEXT: vpmovmskb %xmm0, %eax 1154; AVX1-NEXT: shll $16, %eax 1155; AVX1-NEXT: orl %esi, %eax 1156; AVX1-NEXT: shlq $32, %rax 1157; AVX1-NEXT: orq %rdx, %rax 1158; AVX1-NEXT: addq %rcx, %rax 1159; AVX1-NEXT: vzeroupper 1160; AVX1-NEXT: retq 1161; 1162; AVX2-LABEL: bitcast_v128i8_to_v2i64: 1163; AVX2: # %bb.0: 1164; AVX2-NEXT: vpmovmskb %ymm3, %eax 1165; AVX2-NEXT: shlq $32, %rax 1166; AVX2-NEXT: vpmovmskb %ymm2, %ecx 1167; AVX2-NEXT: orq %rax, %rcx 1168; AVX2-NEXT: vpmovmskb %ymm1, %edx 1169; AVX2-NEXT: shlq $32, %rdx 1170; AVX2-NEXT: vpmovmskb %ymm0, %eax 1171; AVX2-NEXT: orq %rdx, %rax 1172; AVX2-NEXT: addq %rcx, %rax 1173; AVX2-NEXT: vzeroupper 1174; AVX2-NEXT: retq 1175; 1176; AVX512-LABEL: bitcast_v128i8_to_v2i64: 1177; AVX512: # %bb.0: 1178; AVX512-NEXT: vpmovb2m %zmm1, %k0 1179; AVX512-NEXT: kmovq %k0, %rcx 1180; AVX512-NEXT: vpmovb2m %zmm0, %k0 1181; AVX512-NEXT: kmovq %k0, %rax 1182; AVX512-NEXT: addq %rcx, %rax 1183; AVX512-NEXT: vzeroupper 1184; AVX512-NEXT: retq 1185 %1 = icmp slt <128 x i8> %a0, zeroinitializer 1186 %2 = bitcast <128 x i1> %1 to <2 x i64> 1187 %3 = extractelement <2 x i64> %2, i32 0 1188 %4 = extractelement <2 x i64> %2, i32 1 1189 %5 = add i64 %3, %4 1190 ret i64 %5 1191} 1192 1193define i1 @trunc_v128i8_cmp(<128 x i8> %a0) nounwind { 1194; SSE2-SSSE3-LABEL: trunc_v128i8_cmp: 1195; SSE2-SSSE3: # %bb.0: 1196; SSE2-SSSE3-NEXT: psllw $7, %xmm0 1197; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax 1198; SSE2-SSSE3-NEXT: psllw $7, %xmm1 1199; SSE2-SSSE3-NEXT: pmovmskb %xmm1, %ecx 1200; SSE2-SSSE3-NEXT: shll $16, %ecx 1201; SSE2-SSSE3-NEXT: orl %eax, %ecx 1202; SSE2-SSSE3-NEXT: psllw $7, %xmm2 1203; SSE2-SSSE3-NEXT: pmovmskb %xmm2, %edx 1204; SSE2-SSSE3-NEXT: psllw $7, %xmm3 1205; SSE2-SSSE3-NEXT: pmovmskb %xmm3, %eax 1206; SSE2-SSSE3-NEXT: shll $16, %eax 1207; SSE2-SSSE3-NEXT: orl %edx, %eax 1208; SSE2-SSSE3-NEXT: shlq $32, %rax 1209; SSE2-SSSE3-NEXT: orq %rcx, %rax 1210; SSE2-SSSE3-NEXT: psllw $7, %xmm4 1211; SSE2-SSSE3-NEXT: pmovmskb %xmm4, %ecx 1212; SSE2-SSSE3-NEXT: psllw $7, %xmm5 1213; SSE2-SSSE3-NEXT: pmovmskb %xmm5, %edx 1214; SSE2-SSSE3-NEXT: shll $16, %edx 1215; SSE2-SSSE3-NEXT: orl %ecx, %edx 1216; SSE2-SSSE3-NEXT: psllw $7, %xmm6 1217; SSE2-SSSE3-NEXT: pmovmskb %xmm6, %ecx 1218; SSE2-SSSE3-NEXT: psllw $7, %xmm7 1219; SSE2-SSSE3-NEXT: pmovmskb %xmm7, %esi 1220; SSE2-SSSE3-NEXT: shll $16, %esi 1221; SSE2-SSSE3-NEXT: orl %ecx, %esi 1222; SSE2-SSSE3-NEXT: shlq $32, %rsi 1223; SSE2-SSSE3-NEXT: orq %rdx, %rsi 1224; SSE2-SSSE3-NEXT: movq %rsi, %xmm0 1225; SSE2-SSSE3-NEXT: movq %rax, %xmm1 1226; SSE2-SSSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] 1227; SSE2-SSSE3-NEXT: pcmpeqd %xmm0, %xmm0 1228; SSE2-SSSE3-NEXT: pcmpeqb %xmm1, %xmm0 1229; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax 1230; SSE2-SSSE3-NEXT: cmpl $65535, %eax # imm = 0xFFFF 1231; SSE2-SSSE3-NEXT: setne %al 1232; SSE2-SSSE3-NEXT: retq 1233; 1234; SSE41-LABEL: trunc_v128i8_cmp: 1235; SSE41: # %bb.0: 1236; SSE41-NEXT: psllw $7, %xmm0 1237; SSE41-NEXT: pmovmskb %xmm0, %eax 1238; SSE41-NEXT: psllw $7, %xmm1 1239; SSE41-NEXT: pmovmskb %xmm1, %ecx 1240; SSE41-NEXT: shll $16, %ecx 1241; SSE41-NEXT: orl %eax, %ecx 1242; SSE41-NEXT: psllw $7, %xmm2 1243; SSE41-NEXT: pmovmskb %xmm2, %edx 1244; SSE41-NEXT: psllw $7, %xmm3 1245; SSE41-NEXT: pmovmskb %xmm3, %eax 1246; SSE41-NEXT: shll $16, %eax 1247; SSE41-NEXT: orl %edx, %eax 1248; SSE41-NEXT: shlq $32, %rax 1249; SSE41-NEXT: orq %rcx, %rax 1250; SSE41-NEXT: psllw $7, %xmm4 1251; SSE41-NEXT: pmovmskb %xmm4, %ecx 1252; SSE41-NEXT: psllw $7, %xmm5 1253; SSE41-NEXT: pmovmskb %xmm5, %edx 1254; SSE41-NEXT: shll $16, %edx 1255; SSE41-NEXT: orl %ecx, %edx 1256; SSE41-NEXT: psllw $7, %xmm6 1257; SSE41-NEXT: pmovmskb %xmm6, %ecx 1258; SSE41-NEXT: psllw $7, %xmm7 1259; SSE41-NEXT: pmovmskb %xmm7, %esi 1260; SSE41-NEXT: shll $16, %esi 1261; SSE41-NEXT: orl %ecx, %esi 1262; SSE41-NEXT: shlq $32, %rsi 1263; SSE41-NEXT: orq %rdx, %rsi 1264; SSE41-NEXT: movq %rsi, %xmm0 1265; SSE41-NEXT: movq %rax, %xmm1 1266; SSE41-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] 1267; SSE41-NEXT: pcmpeqd %xmm0, %xmm0 1268; SSE41-NEXT: ptest %xmm0, %xmm1 1269; SSE41-NEXT: setae %al 1270; SSE41-NEXT: retq 1271; 1272; AVX1-LABEL: trunc_v128i8_cmp: 1273; AVX1: # %bb.0: 1274; AVX1-NEXT: vpsllw $7, %xmm0, %xmm4 1275; AVX1-NEXT: vpmovmskb %xmm4, %eax 1276; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1277; AVX1-NEXT: vpsllw $7, %xmm0, %xmm0 1278; AVX1-NEXT: vpmovmskb %xmm0, %ecx 1279; AVX1-NEXT: shll $16, %ecx 1280; AVX1-NEXT: orl %eax, %ecx 1281; AVX1-NEXT: vpsllw $7, %xmm1, %xmm0 1282; AVX1-NEXT: vpmovmskb %xmm0, %edx 1283; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0 1284; AVX1-NEXT: vpsllw $7, %xmm0, %xmm0 1285; AVX1-NEXT: vpmovmskb %xmm0, %eax 1286; AVX1-NEXT: shll $16, %eax 1287; AVX1-NEXT: orl %edx, %eax 1288; AVX1-NEXT: shlq $32, %rax 1289; AVX1-NEXT: orq %rcx, %rax 1290; AVX1-NEXT: vpsllw $7, %xmm2, %xmm0 1291; AVX1-NEXT: vpmovmskb %xmm0, %ecx 1292; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm0 1293; AVX1-NEXT: vpsllw $7, %xmm0, %xmm0 1294; AVX1-NEXT: vpmovmskb %xmm0, %edx 1295; AVX1-NEXT: shll $16, %edx 1296; AVX1-NEXT: orl %ecx, %edx 1297; AVX1-NEXT: vpsllw $7, %xmm3, %xmm0 1298; AVX1-NEXT: vpmovmskb %xmm0, %ecx 1299; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm0 1300; AVX1-NEXT: vpsllw $7, %xmm0, %xmm0 1301; AVX1-NEXT: vpmovmskb %xmm0, %esi 1302; AVX1-NEXT: shll $16, %esi 1303; AVX1-NEXT: orl %ecx, %esi 1304; AVX1-NEXT: shlq $32, %rsi 1305; AVX1-NEXT: orq %rdx, %rsi 1306; AVX1-NEXT: vmovq %rsi, %xmm0 1307; AVX1-NEXT: vmovq %rax, %xmm1 1308; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 1309; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 1310; AVX1-NEXT: vptest %xmm1, %xmm0 1311; AVX1-NEXT: setae %al 1312; AVX1-NEXT: vzeroupper 1313; AVX1-NEXT: retq 1314; 1315; AVX2-LABEL: trunc_v128i8_cmp: 1316; AVX2: # %bb.0: 1317; AVX2-NEXT: vpsllw $7, %ymm1, %ymm1 1318; AVX2-NEXT: vpmovmskb %ymm1, %eax 1319; AVX2-NEXT: shlq $32, %rax 1320; AVX2-NEXT: vpsllw $7, %ymm0, %ymm0 1321; AVX2-NEXT: vpmovmskb %ymm0, %ecx 1322; AVX2-NEXT: orq %rax, %rcx 1323; AVX2-NEXT: vpsllw $7, %ymm3, %ymm0 1324; AVX2-NEXT: vpmovmskb %ymm0, %eax 1325; AVX2-NEXT: shlq $32, %rax 1326; AVX2-NEXT: vpsllw $7, %ymm2, %ymm0 1327; AVX2-NEXT: vpmovmskb %ymm0, %edx 1328; AVX2-NEXT: orq %rax, %rdx 1329; AVX2-NEXT: vmovq %rdx, %xmm0 1330; AVX2-NEXT: vmovq %rcx, %xmm1 1331; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 1332; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 1333; AVX2-NEXT: vptest %xmm1, %xmm0 1334; AVX2-NEXT: setae %al 1335; AVX2-NEXT: vzeroupper 1336; AVX2-NEXT: retq 1337; 1338; AVX512-LABEL: trunc_v128i8_cmp: 1339; AVX512: # %bb.0: 1340; AVX512-NEXT: vpsllw $7, %zmm0, %zmm0 1341; AVX512-NEXT: vpmovb2m %zmm0, %k0 1342; AVX512-NEXT: kmovq %k0, %rax 1343; AVX512-NEXT: vpsllw $7, %zmm1, %zmm0 1344; AVX512-NEXT: vpmovb2m %zmm0, %k0 1345; AVX512-NEXT: kmovq %k0, %rcx 1346; AVX512-NEXT: vmovq %rcx, %xmm0 1347; AVX512-NEXT: vmovq %rax, %xmm1 1348; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 1349; AVX512-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 1350; AVX512-NEXT: vptest %xmm1, %xmm0 1351; AVX512-NEXT: setae %al 1352; AVX512-NEXT: vzeroupper 1353; AVX512-NEXT: retq 1354 %1 = trunc <128 x i8> %a0 to <128 x i1> 1355 %2 = bitcast <128 x i1> %1 to i128 1356 %3 = icmp ne i128 %2, -1 1357 ret i1 %3 1358} 1359 1360define [2 x i8] @PR58546(<16 x float> %a0) { 1361; SSE-LABEL: PR58546: 1362; SSE: # %bb.0: 1363; SSE-NEXT: xorps %xmm4, %xmm4 1364; SSE-NEXT: cmpunordps %xmm4, %xmm3 1365; SSE-NEXT: cmpunordps %xmm4, %xmm2 1366; SSE-NEXT: packssdw %xmm3, %xmm2 1367; SSE-NEXT: cmpunordps %xmm4, %xmm1 1368; SSE-NEXT: cmpunordps %xmm4, %xmm0 1369; SSE-NEXT: packssdw %xmm1, %xmm0 1370; SSE-NEXT: packsswb %xmm2, %xmm0 1371; SSE-NEXT: pmovmskb %xmm0, %eax 1372; SSE-NEXT: movl %eax, %edx 1373; SSE-NEXT: shrl $8, %edx 1374; SSE-NEXT: # kill: def $al killed $al killed $eax 1375; SSE-NEXT: # kill: def $dl killed $dl killed $edx 1376; SSE-NEXT: retq 1377; 1378; AVX1-LABEL: PR58546: 1379; AVX1: # %bb.0: 1380; AVX1-NEXT: vxorps %xmm2, %xmm2, %xmm2 1381; AVX1-NEXT: vcmpunordps %ymm2, %ymm1, %ymm1 1382; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 1383; AVX1-NEXT: vpackssdw %xmm3, %xmm1, %xmm1 1384; AVX1-NEXT: vcmpunordps %ymm2, %ymm0, %ymm0 1385; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 1386; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 1387; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 1388; AVX1-NEXT: vpmovmskb %xmm0, %eax 1389; AVX1-NEXT: movl %eax, %edx 1390; AVX1-NEXT: shrl $8, %edx 1391; AVX1-NEXT: # kill: def $al killed $al killed $eax 1392; AVX1-NEXT: # kill: def $dl killed $dl killed $edx 1393; AVX1-NEXT: vzeroupper 1394; AVX1-NEXT: retq 1395; 1396; AVX2-LABEL: PR58546: 1397; AVX2: # %bb.0: 1398; AVX2-NEXT: vxorps %xmm2, %xmm2, %xmm2 1399; AVX2-NEXT: vcmpunordps %ymm2, %ymm1, %ymm1 1400; AVX2-NEXT: vcmpunordps %ymm2, %ymm0, %ymm0 1401; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 1402; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1403; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 1404; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] 1405; AVX2-NEXT: vpmovmskb %xmm0, %eax 1406; AVX2-NEXT: movl %eax, %edx 1407; AVX2-NEXT: shrl $8, %edx 1408; AVX2-NEXT: # kill: def $al killed $al killed $eax 1409; AVX2-NEXT: # kill: def $dl killed $dl killed $edx 1410; AVX2-NEXT: vzeroupper 1411; AVX2-NEXT: retq 1412; 1413; AVX512-LABEL: PR58546: 1414; AVX512: # %bb.0: 1415; AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1 1416; AVX512-NEXT: vcmpunordps %zmm1, %zmm0, %k0 1417; AVX512-NEXT: kshiftrw $8, %k0, %k1 1418; AVX512-NEXT: kmovd %k0, %eax 1419; AVX512-NEXT: kmovd %k1, %edx 1420; AVX512-NEXT: # kill: def $al killed $al killed $eax 1421; AVX512-NEXT: # kill: def $dl killed $dl killed $edx 1422; AVX512-NEXT: vzeroupper 1423; AVX512-NEXT: retq 1424 %1 = fcmp uno <16 x float> %a0, zeroinitializer 1425 %2 = bitcast <16 x i1> %1 to <2 x i8> 1426 %3 = extractelement <2 x i8> %2, i64 0 1427 %4 = extractelement <2 x i8> %2, i64 1 1428 %5 = insertvalue [2 x i8] poison, i8 %3, 0 1429 %6 = insertvalue [2 x i8] %5, i8 %4, 1 1430 ret [2 x i8] %6 1431} 1432 1433define i8 @PR59526(<8 x i32> %a, <8 x i32> %b, ptr %mask) { 1434; SSE-LABEL: PR59526: 1435; SSE: # %bb.0: 1436; SSE-NEXT: pcmpeqd %xmm2, %xmm0 1437; SSE-NEXT: pcmpeqd %xmm3, %xmm1 1438; SSE-NEXT: movdqu (%rdi), %xmm2 1439; SSE-NEXT: pand %xmm0, %xmm2 1440; SSE-NEXT: movdqu 16(%rdi), %xmm0 1441; SSE-NEXT: pand %xmm1, %xmm0 1442; SSE-NEXT: packssdw %xmm0, %xmm2 1443; SSE-NEXT: pmovmskb %xmm2, %eax 1444; SSE-NEXT: testl $43690, %eax # imm = 0xAAAA 1445; SSE-NEXT: setne %al 1446; SSE-NEXT: retq 1447; 1448; AVX1-LABEL: PR59526: 1449; AVX1: # %bb.0: 1450; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1451; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1452; AVX1-NEXT: vpcmpeqd %xmm2, %xmm3, %xmm2 1453; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1454; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1455; AVX1-NEXT: vtestps (%rdi), %ymm0 1456; AVX1-NEXT: setne %al 1457; AVX1-NEXT: vzeroupper 1458; AVX1-NEXT: retq 1459; 1460; AVX2-LABEL: PR59526: 1461; AVX2: # %bb.0: 1462; AVX2-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 1463; AVX2-NEXT: vtestps (%rdi), %ymm0 1464; AVX2-NEXT: setne %al 1465; AVX2-NEXT: vzeroupper 1466; AVX2-NEXT: retq 1467; 1468; AVX512-LABEL: PR59526: 1469; AVX512: # %bb.0: 1470; AVX512-NEXT: vpcmpeqd %ymm1, %ymm0, %k1 1471; AVX512-NEXT: vpxor %xmm0, %xmm0, %xmm0 1472; AVX512-NEXT: vpcmpgtd (%rdi), %ymm0, %k0 {%k1} 1473; AVX512-NEXT: kmovd %k0, %eax 1474; AVX512-NEXT: testb %al, %al 1475; AVX512-NEXT: setne %al 1476; AVX512-NEXT: vzeroupper 1477; AVX512-NEXT: retq 1478 %cmp.eq = icmp eq <8 x i32> %a, %b 1479 %load = load <8 x i32>, ptr %mask, align 1 1480 %cmp.slt = icmp slt <8 x i32> %load, zeroinitializer 1481 %sel = select <8 x i1> %cmp.eq, <8 x i1> %cmp.slt, <8 x i1> zeroinitializer 1482 %bc = bitcast <8 x i1> %sel to i8 1483 %cmp = icmp ne i8 %bc, 0 1484 %conv = zext i1 %cmp to i8 1485 ret i8 %conv 1486} 1487