1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2,X86-SSE2 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2,X64-SSE,X64-SSE2 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,X64-SSE,SSE41 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2 7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F 8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW 9; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=AVX512,AVX512VL 10 11; 12; Truncate 13; 14 15define i1 @trunc_v2i64_v2i1(<2 x i64>) nounwind { 16; SSE-LABEL: trunc_v2i64_v2i1: 17; SSE: # %bb.0: 18; SSE-NEXT: psllq $63, %xmm0 19; SSE-NEXT: movmskpd %xmm0, %eax 20; SSE-NEXT: testb %al, %al 21; SSE-NEXT: setnp %al 22; SSE-NEXT: ret{{[l|q]}} 23; 24; AVX-LABEL: trunc_v2i64_v2i1: 25; AVX: # %bb.0: 26; AVX-NEXT: vpsllq $63, %xmm0, %xmm0 27; AVX-NEXT: vmovmskpd %xmm0, %eax 28; AVX-NEXT: testb %al, %al 29; AVX-NEXT: setnp %al 30; AVX-NEXT: retq 31; 32; AVX512F-LABEL: trunc_v2i64_v2i1: 33; AVX512F: # %bb.0: 34; AVX512F-NEXT: vpsllq $63, %xmm0, %xmm0 35; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0 36; AVX512F-NEXT: kmovw %k0, %eax 37; AVX512F-NEXT: testb $3, %al 38; AVX512F-NEXT: setnp %al 39; AVX512F-NEXT: vzeroupper 40; AVX512F-NEXT: retq 41; 42; AVX512BW-LABEL: trunc_v2i64_v2i1: 43; AVX512BW: # %bb.0: 44; AVX512BW-NEXT: vpsllq $63, %xmm0, %xmm0 45; AVX512BW-NEXT: vptestmq %zmm0, %zmm0, %k0 46; AVX512BW-NEXT: kmovd %k0, %eax 47; AVX512BW-NEXT: testb $3, %al 48; AVX512BW-NEXT: setnp %al 49; AVX512BW-NEXT: vzeroupper 50; AVX512BW-NEXT: retq 51; 52; AVX512VL-LABEL: trunc_v2i64_v2i1: 53; AVX512VL: # %bb.0: 54; AVX512VL-NEXT: vpsllq $63, %xmm0, %xmm0 55; AVX512VL-NEXT: vptestmq %xmm0, %xmm0, %k0 56; AVX512VL-NEXT: kmovd %k0, %eax 57; AVX512VL-NEXT: testb %al, %al 58; AVX512VL-NEXT: setnp %al 59; AVX512VL-NEXT: retq 60 %a = trunc <2 x i64> %0 to <2 x i1> 61 %b = call i1 @llvm.vector.reduce.xor.v2i1(<2 x i1> %a) 62 ret i1 %b 63} 64 65define i1 @trunc_v4i32_v4i1(<4 x i32>) nounwind { 66; SSE-LABEL: trunc_v4i32_v4i1: 67; SSE: # %bb.0: 68; SSE-NEXT: pslld $31, %xmm0 69; SSE-NEXT: movmskps %xmm0, %eax 70; SSE-NEXT: testb %al, %al 71; SSE-NEXT: setnp %al 72; SSE-NEXT: ret{{[l|q]}} 73; 74; AVX-LABEL: trunc_v4i32_v4i1: 75; AVX: # %bb.0: 76; AVX-NEXT: vpslld $31, %xmm0, %xmm0 77; AVX-NEXT: vmovmskps %xmm0, %eax 78; AVX-NEXT: testb %al, %al 79; AVX-NEXT: setnp %al 80; AVX-NEXT: retq 81; 82; AVX512F-LABEL: trunc_v4i32_v4i1: 83; AVX512F: # %bb.0: 84; AVX512F-NEXT: vpslld $31, %xmm0, %xmm0 85; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 86; AVX512F-NEXT: kmovw %k0, %eax 87; AVX512F-NEXT: testb $15, %al 88; AVX512F-NEXT: setnp %al 89; AVX512F-NEXT: vzeroupper 90; AVX512F-NEXT: retq 91; 92; AVX512BW-LABEL: trunc_v4i32_v4i1: 93; AVX512BW: # %bb.0: 94; AVX512BW-NEXT: vpslld $31, %xmm0, %xmm0 95; AVX512BW-NEXT: vptestmd %zmm0, %zmm0, %k0 96; AVX512BW-NEXT: kmovd %k0, %eax 97; AVX512BW-NEXT: testb $15, %al 98; AVX512BW-NEXT: setnp %al 99; AVX512BW-NEXT: vzeroupper 100; AVX512BW-NEXT: retq 101; 102; AVX512VL-LABEL: trunc_v4i32_v4i1: 103; AVX512VL: # %bb.0: 104; AVX512VL-NEXT: vpslld $31, %xmm0, %xmm0 105; AVX512VL-NEXT: vptestmd %xmm0, %xmm0, %k0 106; AVX512VL-NEXT: kmovd %k0, %eax 107; AVX512VL-NEXT: testb %al, %al 108; AVX512VL-NEXT: setnp %al 109; AVX512VL-NEXT: retq 110 %a = trunc <4 x i32> %0 to <4 x i1> 111 %b = call i1 @llvm.vector.reduce.xor.v4i1(<4 x i1> %a) 112 ret i1 %b 113} 114 115define i1 @trunc_v8i16_v8i1(<8 x i16>) nounwind { 116; SSE-LABEL: trunc_v8i16_v8i1: 117; SSE: # %bb.0: 118; SSE-NEXT: psllw $15, %xmm0 119; SSE-NEXT: packsswb %xmm0, %xmm0 120; SSE-NEXT: pmovmskb %xmm0, %eax 121; SSE-NEXT: testb %al, %al 122; SSE-NEXT: setnp %al 123; SSE-NEXT: ret{{[l|q]}} 124; 125; AVX-LABEL: trunc_v8i16_v8i1: 126; AVX: # %bb.0: 127; AVX-NEXT: vpsllw $15, %xmm0, %xmm0 128; AVX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 129; AVX-NEXT: vpmovmskb %xmm0, %eax 130; AVX-NEXT: testb %al, %al 131; AVX-NEXT: setnp %al 132; AVX-NEXT: retq 133; 134; AVX512F-LABEL: trunc_v8i16_v8i1: 135; AVX512F: # %bb.0: 136; AVX512F-NEXT: vpmovsxwq %xmm0, %zmm0 137; AVX512F-NEXT: vpsllq $63, %zmm0, %zmm0 138; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0 139; AVX512F-NEXT: kmovw %k0, %eax 140; AVX512F-NEXT: testb %al, %al 141; AVX512F-NEXT: setnp %al 142; AVX512F-NEXT: vzeroupper 143; AVX512F-NEXT: retq 144; 145; AVX512BW-LABEL: trunc_v8i16_v8i1: 146; AVX512BW: # %bb.0: 147; AVX512BW-NEXT: vpsllw $15, %xmm0, %xmm0 148; AVX512BW-NEXT: vpmovw2m %zmm0, %k0 149; AVX512BW-NEXT: kmovd %k0, %eax 150; AVX512BW-NEXT: testb %al, %al 151; AVX512BW-NEXT: setnp %al 152; AVX512BW-NEXT: vzeroupper 153; AVX512BW-NEXT: retq 154; 155; AVX512VL-LABEL: trunc_v8i16_v8i1: 156; AVX512VL: # %bb.0: 157; AVX512VL-NEXT: vpsllw $15, %xmm0, %xmm0 158; AVX512VL-NEXT: vpmovw2m %xmm0, %k0 159; AVX512VL-NEXT: kmovd %k0, %eax 160; AVX512VL-NEXT: testb %al, %al 161; AVX512VL-NEXT: setnp %al 162; AVX512VL-NEXT: retq 163 %a = trunc <8 x i16> %0 to <8 x i1> 164 %b = call i1 @llvm.vector.reduce.xor.v8i1(<8 x i1> %a) 165 ret i1 %b 166} 167 168define i1 @trunc_v16i8_v16i1(<16 x i8>) nounwind { 169; SSE-LABEL: trunc_v16i8_v16i1: 170; SSE: # %bb.0: 171; SSE-NEXT: psllw $7, %xmm0 172; SSE-NEXT: pmovmskb %xmm0, %eax 173; SSE-NEXT: xorb %ah, %al 174; SSE-NEXT: setnp %al 175; SSE-NEXT: ret{{[l|q]}} 176; 177; AVX-LABEL: trunc_v16i8_v16i1: 178; AVX: # %bb.0: 179; AVX-NEXT: vpsllw $7, %xmm0, %xmm0 180; AVX-NEXT: vpmovmskb %xmm0, %eax 181; AVX-NEXT: xorb %ah, %al 182; AVX-NEXT: setnp %al 183; AVX-NEXT: retq 184; 185; AVX512-LABEL: trunc_v16i8_v16i1: 186; AVX512: # %bb.0: 187; AVX512-NEXT: vpsllw $7, %xmm0, %xmm0 188; AVX512-NEXT: vpmovmskb %xmm0, %eax 189; AVX512-NEXT: xorb %ah, %al 190; AVX512-NEXT: setnp %al 191; AVX512-NEXT: retq 192 %a = trunc <16 x i8> %0 to <16 x i1> 193 %b = call i1 @llvm.vector.reduce.xor.v16i1(<16 x i1> %a) 194 ret i1 %b 195} 196 197define i1 @trunc_v4i64_v4i1(<4 x i64>) nounwind { 198; SSE-LABEL: trunc_v4i64_v4i1: 199; SSE: # %bb.0: 200; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] 201; SSE-NEXT: pslld $31, %xmm0 202; SSE-NEXT: movmskps %xmm0, %eax 203; SSE-NEXT: testb %al, %al 204; SSE-NEXT: setnp %al 205; SSE-NEXT: ret{{[l|q]}} 206; 207; AVX1-LABEL: trunc_v4i64_v4i1: 208; AVX1: # %bb.0: 209; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 210; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] 211; AVX1-NEXT: vpslld $31, %xmm0, %xmm0 212; AVX1-NEXT: vmovmskps %xmm0, %eax 213; AVX1-NEXT: testb %al, %al 214; AVX1-NEXT: setnp %al 215; AVX1-NEXT: vzeroupper 216; AVX1-NEXT: retq 217; 218; AVX2-LABEL: trunc_v4i64_v4i1: 219; AVX2: # %bb.0: 220; AVX2-NEXT: vpsllq $63, %ymm0, %ymm0 221; AVX2-NEXT: vmovmskpd %ymm0, %eax 222; AVX2-NEXT: testb %al, %al 223; AVX2-NEXT: setnp %al 224; AVX2-NEXT: vzeroupper 225; AVX2-NEXT: retq 226; 227; AVX512F-LABEL: trunc_v4i64_v4i1: 228; AVX512F: # %bb.0: 229; AVX512F-NEXT: vpsllq $63, %ymm0, %ymm0 230; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0 231; AVX512F-NEXT: kmovw %k0, %eax 232; AVX512F-NEXT: testb $15, %al 233; AVX512F-NEXT: setnp %al 234; AVX512F-NEXT: vzeroupper 235; AVX512F-NEXT: retq 236; 237; AVX512BW-LABEL: trunc_v4i64_v4i1: 238; AVX512BW: # %bb.0: 239; AVX512BW-NEXT: vpsllq $63, %ymm0, %ymm0 240; AVX512BW-NEXT: vptestmq %zmm0, %zmm0, %k0 241; AVX512BW-NEXT: kmovd %k0, %eax 242; AVX512BW-NEXT: testb $15, %al 243; AVX512BW-NEXT: setnp %al 244; AVX512BW-NEXT: vzeroupper 245; AVX512BW-NEXT: retq 246; 247; AVX512VL-LABEL: trunc_v4i64_v4i1: 248; AVX512VL: # %bb.0: 249; AVX512VL-NEXT: vpsllq $63, %ymm0, %ymm0 250; AVX512VL-NEXT: vptestmq %ymm0, %ymm0, %k0 251; AVX512VL-NEXT: kmovd %k0, %eax 252; AVX512VL-NEXT: testb %al, %al 253; AVX512VL-NEXT: setnp %al 254; AVX512VL-NEXT: vzeroupper 255; AVX512VL-NEXT: retq 256 %a = trunc <4 x i64> %0 to <4 x i1> 257 %b = call i1 @llvm.vector.reduce.xor.v4i1(<4 x i1> %a) 258 ret i1 %b 259} 260 261define i1 @trunc_v8i32_v8i1(<8 x i32>) nounwind { 262; SSE2-LABEL: trunc_v8i32_v8i1: 263; SSE2: # %bb.0: 264; SSE2-NEXT: pslld $16, %xmm1 265; SSE2-NEXT: psrad $16, %xmm1 266; SSE2-NEXT: pslld $16, %xmm0 267; SSE2-NEXT: psrad $16, %xmm0 268; SSE2-NEXT: packssdw %xmm1, %xmm0 269; SSE2-NEXT: psllw $15, %xmm0 270; SSE2-NEXT: packsswb %xmm0, %xmm0 271; SSE2-NEXT: pmovmskb %xmm0, %eax 272; SSE2-NEXT: testb %al, %al 273; SSE2-NEXT: setnp %al 274; SSE2-NEXT: ret{{[l|q]}} 275; 276; SSE41-LABEL: trunc_v8i32_v8i1: 277; SSE41: # %bb.0: 278; SSE41-NEXT: pxor %xmm2, %xmm2 279; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7] 280; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7] 281; SSE41-NEXT: packusdw %xmm1, %xmm0 282; SSE41-NEXT: psllw $15, %xmm0 283; SSE41-NEXT: packsswb %xmm0, %xmm0 284; SSE41-NEXT: pmovmskb %xmm0, %eax 285; SSE41-NEXT: testb %al, %al 286; SSE41-NEXT: setnp %al 287; SSE41-NEXT: retq 288; 289; AVX1-LABEL: trunc_v8i32_v8i1: 290; AVX1: # %bb.0: 291; AVX1-NEXT: vpslld $31, %xmm0, %xmm1 292; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 293; AVX1-NEXT: vpslld $31, %xmm0, %xmm0 294; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 295; AVX1-NEXT: vmovmskps %ymm0, %eax 296; AVX1-NEXT: testb %al, %al 297; AVX1-NEXT: setnp %al 298; AVX1-NEXT: vzeroupper 299; AVX1-NEXT: retq 300; 301; AVX2-LABEL: trunc_v8i32_v8i1: 302; AVX2: # %bb.0: 303; AVX2-NEXT: vpslld $31, %ymm0, %ymm0 304; AVX2-NEXT: vmovmskps %ymm0, %eax 305; AVX2-NEXT: testb %al, %al 306; AVX2-NEXT: setnp %al 307; AVX2-NEXT: vzeroupper 308; AVX2-NEXT: retq 309; 310; AVX512F-LABEL: trunc_v8i32_v8i1: 311; AVX512F: # %bb.0: 312; AVX512F-NEXT: vpslld $31, %ymm0, %ymm0 313; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 314; AVX512F-NEXT: kmovw %k0, %eax 315; AVX512F-NEXT: testb %al, %al 316; AVX512F-NEXT: setnp %al 317; AVX512F-NEXT: vzeroupper 318; AVX512F-NEXT: retq 319; 320; AVX512BW-LABEL: trunc_v8i32_v8i1: 321; AVX512BW: # %bb.0: 322; AVX512BW-NEXT: vpslld $31, %ymm0, %ymm0 323; AVX512BW-NEXT: vptestmd %zmm0, %zmm0, %k0 324; AVX512BW-NEXT: kmovd %k0, %eax 325; AVX512BW-NEXT: testb %al, %al 326; AVX512BW-NEXT: setnp %al 327; AVX512BW-NEXT: vzeroupper 328; AVX512BW-NEXT: retq 329; 330; AVX512VL-LABEL: trunc_v8i32_v8i1: 331; AVX512VL: # %bb.0: 332; AVX512VL-NEXT: vpslld $31, %ymm0, %ymm0 333; AVX512VL-NEXT: vptestmd %ymm0, %ymm0, %k0 334; AVX512VL-NEXT: kmovd %k0, %eax 335; AVX512VL-NEXT: testb %al, %al 336; AVX512VL-NEXT: setnp %al 337; AVX512VL-NEXT: vzeroupper 338; AVX512VL-NEXT: retq 339 %a = trunc <8 x i32> %0 to <8 x i1> 340 %b = call i1 @llvm.vector.reduce.xor.v8i1(<8 x i1> %a) 341 ret i1 %b 342} 343 344define i1 @trunc_v16i16_v16i1(<16 x i16>) nounwind { 345; SSE2-LABEL: trunc_v16i16_v16i1: 346; SSE2: # %bb.0: 347; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] 348; SSE2-NEXT: pand %xmm2, %xmm1 349; SSE2-NEXT: pand %xmm2, %xmm0 350; SSE2-NEXT: packuswb %xmm1, %xmm0 351; SSE2-NEXT: psllw $7, %xmm0 352; SSE2-NEXT: pmovmskb %xmm0, %eax 353; SSE2-NEXT: xorb %ah, %al 354; SSE2-NEXT: setnp %al 355; SSE2-NEXT: ret{{[l|q]}} 356; 357; SSE41-LABEL: trunc_v16i16_v16i1: 358; SSE41: # %bb.0: 359; SSE41-NEXT: pmovzxbw {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] 360; SSE41-NEXT: pand %xmm2, %xmm1 361; SSE41-NEXT: pand %xmm2, %xmm0 362; SSE41-NEXT: packuswb %xmm1, %xmm0 363; SSE41-NEXT: psllw $7, %xmm0 364; SSE41-NEXT: pmovmskb %xmm0, %eax 365; SSE41-NEXT: xorb %ah, %al 366; SSE41-NEXT: setnp %al 367; SSE41-NEXT: retq 368; 369; AVX1-LABEL: trunc_v16i16_v16i1: 370; AVX1: # %bb.0: 371; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 372; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 373; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 374; AVX1-NEXT: vpsllw $7, %xmm0, %xmm0 375; AVX1-NEXT: vpmovmskb %xmm0, %eax 376; AVX1-NEXT: xorb %ah, %al 377; AVX1-NEXT: setnp %al 378; AVX1-NEXT: vzeroupper 379; AVX1-NEXT: retq 380; 381; AVX2-LABEL: trunc_v16i16_v16i1: 382; AVX2: # %bb.0: 383; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 384; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 385; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 386; AVX2-NEXT: vpsllw $7, %xmm0, %xmm0 387; AVX2-NEXT: vpmovmskb %xmm0, %eax 388; AVX2-NEXT: xorb %ah, %al 389; AVX2-NEXT: setnp %al 390; AVX2-NEXT: vzeroupper 391; AVX2-NEXT: retq 392; 393; AVX512F-LABEL: trunc_v16i16_v16i1: 394; AVX512F: # %bb.0: 395; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0 396; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0 397; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 398; AVX512F-NEXT: kmovw %k0, %eax 399; AVX512F-NEXT: movl %eax, %ecx 400; AVX512F-NEXT: shrl $8, %ecx 401; AVX512F-NEXT: xorb %al, %cl 402; AVX512F-NEXT: setnp %al 403; AVX512F-NEXT: vzeroupper 404; AVX512F-NEXT: retq 405; 406; AVX512BW-LABEL: trunc_v16i16_v16i1: 407; AVX512BW: # %bb.0: 408; AVX512BW-NEXT: vpsllw $15, %ymm0, %ymm0 409; AVX512BW-NEXT: vpmovw2m %zmm0, %k0 410; AVX512BW-NEXT: kmovd %k0, %eax 411; AVX512BW-NEXT: movl %eax, %ecx 412; AVX512BW-NEXT: shrl $8, %ecx 413; AVX512BW-NEXT: xorb %al, %cl 414; AVX512BW-NEXT: setnp %al 415; AVX512BW-NEXT: vzeroupper 416; AVX512BW-NEXT: retq 417; 418; AVX512VL-LABEL: trunc_v16i16_v16i1: 419; AVX512VL: # %bb.0: 420; AVX512VL-NEXT: vpsllw $15, %ymm0, %ymm0 421; AVX512VL-NEXT: vpmovw2m %ymm0, %k0 422; AVX512VL-NEXT: kmovd %k0, %eax 423; AVX512VL-NEXT: movl %eax, %ecx 424; AVX512VL-NEXT: shrl $8, %ecx 425; AVX512VL-NEXT: xorb %al, %cl 426; AVX512VL-NEXT: setnp %al 427; AVX512VL-NEXT: vzeroupper 428; AVX512VL-NEXT: retq 429 %a = trunc <16 x i16> %0 to <16 x i1> 430 %b = call i1 @llvm.vector.reduce.xor.v16i1(<16 x i1> %a) 431 ret i1 %b 432} 433 434define i1 @trunc_v32i8_v32i1(<32 x i8>) nounwind { 435; SSE-LABEL: trunc_v32i8_v32i1: 436; SSE: # %bb.0: 437; SSE-NEXT: pxor %xmm1, %xmm0 438; SSE-NEXT: psllw $7, %xmm0 439; SSE-NEXT: pmovmskb %xmm0, %eax 440; SSE-NEXT: xorb %ah, %al 441; SSE-NEXT: setnp %al 442; SSE-NEXT: ret{{[l|q]}} 443; 444; AVX1-LABEL: trunc_v32i8_v32i1: 445; AVX1: # %bb.0: 446; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 447; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 448; AVX1-NEXT: vpsllw $7, %xmm0, %xmm0 449; AVX1-NEXT: vpmovmskb %xmm0, %eax 450; AVX1-NEXT: xorb %ah, %al 451; AVX1-NEXT: setnp %al 452; AVX1-NEXT: vzeroupper 453; AVX1-NEXT: retq 454; 455; AVX2-LABEL: trunc_v32i8_v32i1: 456; AVX2: # %bb.0: 457; AVX2-NEXT: vpsllw $7, %ymm0, %ymm0 458; AVX2-NEXT: vpmovmskb %ymm0, %eax 459; AVX2-NEXT: movl %eax, %ecx 460; AVX2-NEXT: shrl $16, %ecx 461; AVX2-NEXT: xorl %eax, %ecx 462; AVX2-NEXT: xorb %ch, %cl 463; AVX2-NEXT: setnp %al 464; AVX2-NEXT: vzeroupper 465; AVX2-NEXT: retq 466; 467; AVX512F-LABEL: trunc_v32i8_v32i1: 468; AVX512F: # %bb.0: 469; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 470; AVX512F-NEXT: vpxor %xmm1, %xmm0, %xmm0 471; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 472; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0 473; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 474; AVX512F-NEXT: kshiftrw $8, %k0, %k1 475; AVX512F-NEXT: kxorw %k1, %k0, %k0 476; AVX512F-NEXT: kshiftrw $4, %k0, %k1 477; AVX512F-NEXT: kxorw %k1, %k0, %k0 478; AVX512F-NEXT: kshiftrw $2, %k0, %k1 479; AVX512F-NEXT: kxorw %k1, %k0, %k0 480; AVX512F-NEXT: kshiftrw $1, %k0, %k1 481; AVX512F-NEXT: kxorw %k1, %k0, %k0 482; AVX512F-NEXT: kmovw %k0, %eax 483; AVX512F-NEXT: # kill: def $al killed $al killed $eax 484; AVX512F-NEXT: vzeroupper 485; AVX512F-NEXT: retq 486; 487; AVX512BW-LABEL: trunc_v32i8_v32i1: 488; AVX512BW: # %bb.0: 489; AVX512BW-NEXT: vpsllw $7, %ymm0, %ymm0 490; AVX512BW-NEXT: vpmovmskb %ymm0, %eax 491; AVX512BW-NEXT: movl %eax, %ecx 492; AVX512BW-NEXT: shrl $16, %ecx 493; AVX512BW-NEXT: xorl %eax, %ecx 494; AVX512BW-NEXT: xorb %ch, %cl 495; AVX512BW-NEXT: setnp %al 496; AVX512BW-NEXT: vzeroupper 497; AVX512BW-NEXT: retq 498; 499; AVX512VL-LABEL: trunc_v32i8_v32i1: 500; AVX512VL: # %bb.0: 501; AVX512VL-NEXT: vpsllw $7, %ymm0, %ymm0 502; AVX512VL-NEXT: vpmovmskb %ymm0, %eax 503; AVX512VL-NEXT: movl %eax, %ecx 504; AVX512VL-NEXT: shrl $16, %ecx 505; AVX512VL-NEXT: xorl %eax, %ecx 506; AVX512VL-NEXT: xorb %ch, %cl 507; AVX512VL-NEXT: setnp %al 508; AVX512VL-NEXT: vzeroupper 509; AVX512VL-NEXT: retq 510 %a = trunc <32 x i8> %0 to <32 x i1> 511 %b = call i1 @llvm.vector.reduce.xor.v32i1(<32 x i1> %a) 512 ret i1 %b 513} 514 515define i1 @trunc_v8i64_v8i1(<8 x i64>) nounwind { 516; X86-SSE2-LABEL: trunc_v8i64_v8i1: 517; X86-SSE2: # %bb.0: 518; X86-SSE2-NEXT: pushl %ebp 519; X86-SSE2-NEXT: movl %esp, %ebp 520; X86-SSE2-NEXT: andl $-16, %esp 521; X86-SSE2-NEXT: subl $16, %esp 522; X86-SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] 523; X86-SSE2-NEXT: pslld $16, %xmm0 524; X86-SSE2-NEXT: psrad $16, %xmm0 525; X86-SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],mem[0,2] 526; X86-SSE2-NEXT: pslld $16, %xmm2 527; X86-SSE2-NEXT: psrad $16, %xmm2 528; X86-SSE2-NEXT: packssdw %xmm2, %xmm0 529; X86-SSE2-NEXT: psllw $15, %xmm0 530; X86-SSE2-NEXT: packsswb %xmm0, %xmm0 531; X86-SSE2-NEXT: pmovmskb %xmm0, %eax 532; X86-SSE2-NEXT: testb %al, %al 533; X86-SSE2-NEXT: setnp %al 534; X86-SSE2-NEXT: movl %ebp, %esp 535; X86-SSE2-NEXT: popl %ebp 536; X86-SSE2-NEXT: retl 537; 538; X64-SSE2-LABEL: trunc_v8i64_v8i1: 539; X64-SSE2: # %bb.0: 540; X64-SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2] 541; X64-SSE2-NEXT: pslld $16, %xmm2 542; X64-SSE2-NEXT: psrad $16, %xmm2 543; X64-SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] 544; X64-SSE2-NEXT: pslld $16, %xmm0 545; X64-SSE2-NEXT: psrad $16, %xmm0 546; X64-SSE2-NEXT: packssdw %xmm2, %xmm0 547; X64-SSE2-NEXT: psllw $15, %xmm0 548; X64-SSE2-NEXT: packsswb %xmm0, %xmm0 549; X64-SSE2-NEXT: pmovmskb %xmm0, %eax 550; X64-SSE2-NEXT: testb %al, %al 551; X64-SSE2-NEXT: setnp %al 552; X64-SSE2-NEXT: retq 553; 554; SSE41-LABEL: trunc_v8i64_v8i1: 555; SSE41: # %bb.0: 556; SSE41-NEXT: pxor %xmm4, %xmm4 557; SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0],xmm4[1,2,3],xmm3[4],xmm4[5,6,7] 558; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0],xmm4[1,2,3],xmm2[4],xmm4[5,6,7] 559; SSE41-NEXT: packusdw %xmm3, %xmm2 560; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0],xmm4[1,2,3],xmm1[4],xmm4[5,6,7] 561; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm4[1,2,3],xmm0[4],xmm4[5,6,7] 562; SSE41-NEXT: packusdw %xmm1, %xmm0 563; SSE41-NEXT: packusdw %xmm2, %xmm0 564; SSE41-NEXT: psllw $15, %xmm0 565; SSE41-NEXT: packsswb %xmm0, %xmm0 566; SSE41-NEXT: pmovmskb %xmm0, %eax 567; SSE41-NEXT: testb %al, %al 568; SSE41-NEXT: setnp %al 569; SSE41-NEXT: retq 570; 571; AVX1-LABEL: trunc_v8i64_v8i1: 572; AVX1: # %bb.0: 573; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 574; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,2],xmm2[0,2] 575; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 576; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[0,2] 577; AVX1-NEXT: vpslld $31, %xmm0, %xmm0 578; AVX1-NEXT: vpslld $31, %xmm1, %xmm1 579; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 580; AVX1-NEXT: vmovmskps %ymm0, %eax 581; AVX1-NEXT: testb %al, %al 582; AVX1-NEXT: setnp %al 583; AVX1-NEXT: vzeroupper 584; AVX1-NEXT: retq 585; 586; AVX2-LABEL: trunc_v8i64_v8i1: 587; AVX2: # %bb.0: 588; AVX2-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3] 589; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 590; AVX2-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6] 591; AVX2-NEXT: vpslld $31, %ymm0, %ymm0 592; AVX2-NEXT: vmovmskps %ymm0, %eax 593; AVX2-NEXT: testb %al, %al 594; AVX2-NEXT: setnp %al 595; AVX2-NEXT: vzeroupper 596; AVX2-NEXT: retq 597; 598; AVX512F-LABEL: trunc_v8i64_v8i1: 599; AVX512F: # %bb.0: 600; AVX512F-NEXT: vpsllq $63, %zmm0, %zmm0 601; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0 602; AVX512F-NEXT: kmovw %k0, %eax 603; AVX512F-NEXT: testb %al, %al 604; AVX512F-NEXT: setnp %al 605; AVX512F-NEXT: vzeroupper 606; AVX512F-NEXT: retq 607; 608; AVX512BW-LABEL: trunc_v8i64_v8i1: 609; AVX512BW: # %bb.0: 610; AVX512BW-NEXT: vpsllq $63, %zmm0, %zmm0 611; AVX512BW-NEXT: vptestmq %zmm0, %zmm0, %k0 612; AVX512BW-NEXT: kmovd %k0, %eax 613; AVX512BW-NEXT: testb %al, %al 614; AVX512BW-NEXT: setnp %al 615; AVX512BW-NEXT: vzeroupper 616; AVX512BW-NEXT: retq 617; 618; AVX512VL-LABEL: trunc_v8i64_v8i1: 619; AVX512VL: # %bb.0: 620; AVX512VL-NEXT: vpsllq $63, %zmm0, %zmm0 621; AVX512VL-NEXT: vptestmq %zmm0, %zmm0, %k0 622; AVX512VL-NEXT: kmovd %k0, %eax 623; AVX512VL-NEXT: testb %al, %al 624; AVX512VL-NEXT: setnp %al 625; AVX512VL-NEXT: vzeroupper 626; AVX512VL-NEXT: retq 627 %a = trunc <8 x i64> %0 to <8 x i1> 628 %b = call i1 @llvm.vector.reduce.xor.v8i1(<8 x i1> %a) 629 ret i1 %b 630} 631 632define i1 @trunc_v16i32_v16i1(<16 x i32>) nounwind { 633; X86-SSE2-LABEL: trunc_v16i32_v16i1: 634; X86-SSE2: # %bb.0: 635; X86-SSE2-NEXT: pushl %ebp 636; X86-SSE2-NEXT: movl %esp, %ebp 637; X86-SSE2-NEXT: andl $-16, %esp 638; X86-SSE2-NEXT: subl $16, %esp 639; X86-SSE2-NEXT: movdqa {{.*#+}} xmm3 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] 640; X86-SSE2-NEXT: pand %xmm3, %xmm1 641; X86-SSE2-NEXT: pand %xmm3, %xmm0 642; X86-SSE2-NEXT: packuswb %xmm1, %xmm0 643; X86-SSE2-NEXT: pand %xmm3, %xmm2 644; X86-SSE2-NEXT: pand 8(%ebp), %xmm3 645; X86-SSE2-NEXT: packuswb %xmm3, %xmm2 646; X86-SSE2-NEXT: packuswb %xmm2, %xmm0 647; X86-SSE2-NEXT: psllw $7, %xmm0 648; X86-SSE2-NEXT: pmovmskb %xmm0, %eax 649; X86-SSE2-NEXT: xorb %ah, %al 650; X86-SSE2-NEXT: setnp %al 651; X86-SSE2-NEXT: movl %ebp, %esp 652; X86-SSE2-NEXT: popl %ebp 653; X86-SSE2-NEXT: retl 654; 655; X64-SSE2-LABEL: trunc_v16i32_v16i1: 656; X64-SSE2: # %bb.0: 657; X64-SSE2-NEXT: movdqa {{.*#+}} xmm4 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] 658; X64-SSE2-NEXT: pand %xmm4, %xmm3 659; X64-SSE2-NEXT: pand %xmm4, %xmm2 660; X64-SSE2-NEXT: packuswb %xmm3, %xmm2 661; X64-SSE2-NEXT: pand %xmm4, %xmm1 662; X64-SSE2-NEXT: pand %xmm4, %xmm0 663; X64-SSE2-NEXT: packuswb %xmm1, %xmm0 664; X64-SSE2-NEXT: packuswb %xmm2, %xmm0 665; X64-SSE2-NEXT: psllw $7, %xmm0 666; X64-SSE2-NEXT: pmovmskb %xmm0, %eax 667; X64-SSE2-NEXT: xorb %ah, %al 668; X64-SSE2-NEXT: setnp %al 669; X64-SSE2-NEXT: retq 670; 671; SSE41-LABEL: trunc_v16i32_v16i1: 672; SSE41: # %bb.0: 673; SSE41-NEXT: pmovzxbd {{.*#+}} xmm4 = [255,255,255,255] 674; SSE41-NEXT: pand %xmm4, %xmm3 675; SSE41-NEXT: pand %xmm4, %xmm2 676; SSE41-NEXT: packusdw %xmm3, %xmm2 677; SSE41-NEXT: pand %xmm4, %xmm1 678; SSE41-NEXT: pand %xmm4, %xmm0 679; SSE41-NEXT: packusdw %xmm1, %xmm0 680; SSE41-NEXT: packuswb %xmm2, %xmm0 681; SSE41-NEXT: psllw $7, %xmm0 682; SSE41-NEXT: pmovmskb %xmm0, %eax 683; SSE41-NEXT: xorb %ah, %al 684; SSE41-NEXT: setnp %al 685; SSE41-NEXT: retq 686; 687; AVX1-LABEL: trunc_v16i32_v16i1: 688; AVX1: # %bb.0: 689; AVX1-NEXT: vbroadcastss {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255] 690; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1 691; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 692; AVX1-NEXT: vpackusdw %xmm3, %xmm1, %xmm1 693; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0 694; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 695; AVX1-NEXT: vpackusdw %xmm2, %xmm0, %xmm0 696; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 697; AVX1-NEXT: vpsllw $7, %xmm0, %xmm0 698; AVX1-NEXT: vpmovmskb %xmm0, %eax 699; AVX1-NEXT: xorb %ah, %al 700; AVX1-NEXT: setnp %al 701; AVX1-NEXT: vzeroupper 702; AVX1-NEXT: retq 703; 704; AVX2-LABEL: trunc_v16i32_v16i1: 705; AVX2: # %bb.0: 706; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] 707; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1 708; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0 709; AVX2-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 710; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 711; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 712; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] 713; AVX2-NEXT: vpsllw $7, %xmm0, %xmm0 714; AVX2-NEXT: vpmovmskb %xmm0, %eax 715; AVX2-NEXT: xorb %ah, %al 716; AVX2-NEXT: setnp %al 717; AVX2-NEXT: vzeroupper 718; AVX2-NEXT: retq 719; 720; AVX512F-LABEL: trunc_v16i32_v16i1: 721; AVX512F: # %bb.0: 722; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0 723; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 724; AVX512F-NEXT: kmovw %k0, %eax 725; AVX512F-NEXT: movl %eax, %ecx 726; AVX512F-NEXT: shrl $8, %ecx 727; AVX512F-NEXT: xorb %al, %cl 728; AVX512F-NEXT: setnp %al 729; AVX512F-NEXT: vzeroupper 730; AVX512F-NEXT: retq 731; 732; AVX512BW-LABEL: trunc_v16i32_v16i1: 733; AVX512BW: # %bb.0: 734; AVX512BW-NEXT: vpslld $31, %zmm0, %zmm0 735; AVX512BW-NEXT: vptestmd %zmm0, %zmm0, %k0 736; AVX512BW-NEXT: kmovd %k0, %eax 737; AVX512BW-NEXT: movl %eax, %ecx 738; AVX512BW-NEXT: shrl $8, %ecx 739; AVX512BW-NEXT: xorb %al, %cl 740; AVX512BW-NEXT: setnp %al 741; AVX512BW-NEXT: vzeroupper 742; AVX512BW-NEXT: retq 743; 744; AVX512VL-LABEL: trunc_v16i32_v16i1: 745; AVX512VL: # %bb.0: 746; AVX512VL-NEXT: vpslld $31, %zmm0, %zmm0 747; AVX512VL-NEXT: vptestmd %zmm0, %zmm0, %k0 748; AVX512VL-NEXT: kmovd %k0, %eax 749; AVX512VL-NEXT: movl %eax, %ecx 750; AVX512VL-NEXT: shrl $8, %ecx 751; AVX512VL-NEXT: xorb %al, %cl 752; AVX512VL-NEXT: setnp %al 753; AVX512VL-NEXT: vzeroupper 754; AVX512VL-NEXT: retq 755 %a = trunc <16 x i32> %0 to <16 x i1> 756 %b = call i1 @llvm.vector.reduce.xor.v16i1(<16 x i1> %a) 757 ret i1 %b 758} 759 760define i1 @trunc_v32i16_v32i1(<32 x i16>) nounwind { 761; X86-SSE2-LABEL: trunc_v32i16_v32i1: 762; X86-SSE2: # %bb.0: 763; X86-SSE2-NEXT: pushl %ebp 764; X86-SSE2-NEXT: movl %esp, %ebp 765; X86-SSE2-NEXT: andl $-16, %esp 766; X86-SSE2-NEXT: subl $16, %esp 767; X86-SSE2-NEXT: movdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255] 768; X86-SSE2-NEXT: pand %xmm3, %xmm1 769; X86-SSE2-NEXT: pand %xmm3, %xmm0 770; X86-SSE2-NEXT: packuswb %xmm1, %xmm0 771; X86-SSE2-NEXT: pand %xmm3, %xmm2 772; X86-SSE2-NEXT: pand 8(%ebp), %xmm3 773; X86-SSE2-NEXT: packuswb %xmm3, %xmm2 774; X86-SSE2-NEXT: pxor %xmm0, %xmm2 775; X86-SSE2-NEXT: psllw $7, %xmm2 776; X86-SSE2-NEXT: pmovmskb %xmm2, %eax 777; X86-SSE2-NEXT: xorb %ah, %al 778; X86-SSE2-NEXT: setnp %al 779; X86-SSE2-NEXT: movl %ebp, %esp 780; X86-SSE2-NEXT: popl %ebp 781; X86-SSE2-NEXT: retl 782; 783; X64-SSE2-LABEL: trunc_v32i16_v32i1: 784; X64-SSE2: # %bb.0: 785; X64-SSE2-NEXT: movdqa {{.*#+}} xmm4 = [255,255,255,255,255,255,255,255] 786; X64-SSE2-NEXT: pand %xmm4, %xmm3 787; X64-SSE2-NEXT: pand %xmm4, %xmm2 788; X64-SSE2-NEXT: packuswb %xmm3, %xmm2 789; X64-SSE2-NEXT: pand %xmm4, %xmm1 790; X64-SSE2-NEXT: pand %xmm4, %xmm0 791; X64-SSE2-NEXT: packuswb %xmm1, %xmm0 792; X64-SSE2-NEXT: pxor %xmm2, %xmm0 793; X64-SSE2-NEXT: psllw $7, %xmm0 794; X64-SSE2-NEXT: pmovmskb %xmm0, %eax 795; X64-SSE2-NEXT: xorb %ah, %al 796; X64-SSE2-NEXT: setnp %al 797; X64-SSE2-NEXT: retq 798; 799; SSE41-LABEL: trunc_v32i16_v32i1: 800; SSE41: # %bb.0: 801; SSE41-NEXT: pmovzxbw {{.*#+}} xmm4 = [255,255,255,255,255,255,255,255] 802; SSE41-NEXT: pand %xmm4, %xmm3 803; SSE41-NEXT: pand %xmm4, %xmm2 804; SSE41-NEXT: packuswb %xmm3, %xmm2 805; SSE41-NEXT: pand %xmm4, %xmm1 806; SSE41-NEXT: pand %xmm4, %xmm0 807; SSE41-NEXT: packuswb %xmm1, %xmm0 808; SSE41-NEXT: pxor %xmm2, %xmm0 809; SSE41-NEXT: psllw $7, %xmm0 810; SSE41-NEXT: pmovmskb %xmm0, %eax 811; SSE41-NEXT: xorb %ah, %al 812; SSE41-NEXT: setnp %al 813; SSE41-NEXT: retq 814; 815; AVX1-LABEL: trunc_v32i16_v32i1: 816; AVX1: # %bb.0: 817; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0 818; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 819; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 820; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 821; AVX1-NEXT: vpsllw $7, %xmm0, %xmm0 822; AVX1-NEXT: vpmovmskb %xmm0, %eax 823; AVX1-NEXT: xorb %ah, %al 824; AVX1-NEXT: setnp %al 825; AVX1-NEXT: vzeroupper 826; AVX1-NEXT: retq 827; 828; AVX2-LABEL: trunc_v32i16_v32i1: 829; AVX2: # %bb.0: 830; AVX2-NEXT: vpbroadcastw {{.*#+}} ymm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] 831; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1 832; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0 833; AVX2-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 834; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] 835; AVX2-NEXT: vpsllw $7, %ymm0, %ymm0 836; AVX2-NEXT: vpmovmskb %ymm0, %eax 837; AVX2-NEXT: movl %eax, %ecx 838; AVX2-NEXT: shrl $16, %ecx 839; AVX2-NEXT: xorl %eax, %ecx 840; AVX2-NEXT: xorb %ch, %cl 841; AVX2-NEXT: setnp %al 842; AVX2-NEXT: vzeroupper 843; AVX2-NEXT: retq 844; 845; AVX512F-LABEL: trunc_v32i16_v32i1: 846; AVX512F: # %bb.0: 847; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 848; AVX512F-NEXT: vpxor %ymm1, %ymm0, %ymm0 849; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0 850; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0 851; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 852; AVX512F-NEXT: kshiftrw $8, %k0, %k1 853; AVX512F-NEXT: kxorw %k1, %k0, %k0 854; AVX512F-NEXT: kshiftrw $4, %k0, %k1 855; AVX512F-NEXT: kxorw %k1, %k0, %k0 856; AVX512F-NEXT: kshiftrw $2, %k0, %k1 857; AVX512F-NEXT: kxorw %k1, %k0, %k0 858; AVX512F-NEXT: kshiftrw $1, %k0, %k1 859; AVX512F-NEXT: kxorw %k1, %k0, %k0 860; AVX512F-NEXT: kmovw %k0, %eax 861; AVX512F-NEXT: # kill: def $al killed $al killed $eax 862; AVX512F-NEXT: vzeroupper 863; AVX512F-NEXT: retq 864; 865; AVX512BW-LABEL: trunc_v32i16_v32i1: 866; AVX512BW: # %bb.0: 867; AVX512BW-NEXT: vpsllw $15, %zmm0, %zmm0 868; AVX512BW-NEXT: vpmovw2m %zmm0, %k0 869; AVX512BW-NEXT: kmovd %k0, %eax 870; AVX512BW-NEXT: movl %eax, %ecx 871; AVX512BW-NEXT: shrl $16, %ecx 872; AVX512BW-NEXT: xorl %eax, %ecx 873; AVX512BW-NEXT: xorb %ch, %cl 874; AVX512BW-NEXT: setnp %al 875; AVX512BW-NEXT: vzeroupper 876; AVX512BW-NEXT: retq 877; 878; AVX512VL-LABEL: trunc_v32i16_v32i1: 879; AVX512VL: # %bb.0: 880; AVX512VL-NEXT: vpsllw $15, %zmm0, %zmm0 881; AVX512VL-NEXT: vpmovw2m %zmm0, %k0 882; AVX512VL-NEXT: kmovd %k0, %eax 883; AVX512VL-NEXT: movl %eax, %ecx 884; AVX512VL-NEXT: shrl $16, %ecx 885; AVX512VL-NEXT: xorl %eax, %ecx 886; AVX512VL-NEXT: xorb %ch, %cl 887; AVX512VL-NEXT: setnp %al 888; AVX512VL-NEXT: vzeroupper 889; AVX512VL-NEXT: retq 890 %a = trunc <32 x i16> %0 to <32 x i1> 891 %b = call i1 @llvm.vector.reduce.xor.v32i1(<32 x i1> %a) 892 ret i1 %b 893} 894 895define i1 @trunc_v64i8_v64i1(<64 x i8>) nounwind { 896; X86-SSE2-LABEL: trunc_v64i8_v64i1: 897; X86-SSE2: # %bb.0: 898; X86-SSE2-NEXT: pushl %ebp 899; X86-SSE2-NEXT: movl %esp, %ebp 900; X86-SSE2-NEXT: andl $-16, %esp 901; X86-SSE2-NEXT: subl $16, %esp 902; X86-SSE2-NEXT: pxor %xmm2, %xmm0 903; X86-SSE2-NEXT: pxor 8(%ebp), %xmm1 904; X86-SSE2-NEXT: pxor %xmm0, %xmm1 905; X86-SSE2-NEXT: psllw $7, %xmm1 906; X86-SSE2-NEXT: pmovmskb %xmm1, %eax 907; X86-SSE2-NEXT: xorb %ah, %al 908; X86-SSE2-NEXT: setnp %al 909; X86-SSE2-NEXT: movl %ebp, %esp 910; X86-SSE2-NEXT: popl %ebp 911; X86-SSE2-NEXT: retl 912; 913; X64-SSE-LABEL: trunc_v64i8_v64i1: 914; X64-SSE: # %bb.0: 915; X64-SSE-NEXT: pxor %xmm3, %xmm1 916; X64-SSE-NEXT: pxor %xmm2, %xmm0 917; X64-SSE-NEXT: pxor %xmm1, %xmm0 918; X64-SSE-NEXT: psllw $7, %xmm0 919; X64-SSE-NEXT: pmovmskb %xmm0, %eax 920; X64-SSE-NEXT: xorb %ah, %al 921; X64-SSE-NEXT: setnp %al 922; X64-SSE-NEXT: retq 923; 924; AVX1-LABEL: trunc_v64i8_v64i1: 925; AVX1: # %bb.0: 926; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0 927; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 928; AVX1-NEXT: vxorps %xmm1, %xmm0, %xmm0 929; AVX1-NEXT: vpsllw $7, %xmm0, %xmm0 930; AVX1-NEXT: vpmovmskb %xmm0, %eax 931; AVX1-NEXT: xorb %ah, %al 932; AVX1-NEXT: setnp %al 933; AVX1-NEXT: vzeroupper 934; AVX1-NEXT: retq 935; 936; AVX2-LABEL: trunc_v64i8_v64i1: 937; AVX2: # %bb.0: 938; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0 939; AVX2-NEXT: vpsllw $7, %ymm0, %ymm0 940; AVX2-NEXT: vpmovmskb %ymm0, %eax 941; AVX2-NEXT: movl %eax, %ecx 942; AVX2-NEXT: shrl $16, %ecx 943; AVX2-NEXT: xorl %eax, %ecx 944; AVX2-NEXT: xorb %ch, %cl 945; AVX2-NEXT: setnp %al 946; AVX2-NEXT: vzeroupper 947; AVX2-NEXT: retq 948; 949; AVX512F-LABEL: trunc_v64i8_v64i1: 950; AVX512F: # %bb.0: 951; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 952; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 953; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm3 954; AVX512F-NEXT: vpxor %xmm2, %xmm3, %xmm2 955; AVX512F-NEXT: vpxor %xmm1, %xmm0, %xmm0 956; AVX512F-NEXT: vpxor %xmm2, %xmm0, %xmm0 957; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 958; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0 959; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 960; AVX512F-NEXT: kshiftrw $8, %k0, %k1 961; AVX512F-NEXT: kxorw %k1, %k0, %k0 962; AVX512F-NEXT: kshiftrw $4, %k0, %k1 963; AVX512F-NEXT: kxorw %k1, %k0, %k0 964; AVX512F-NEXT: kshiftrw $2, %k0, %k1 965; AVX512F-NEXT: kxorw %k1, %k0, %k0 966; AVX512F-NEXT: kshiftrw $1, %k0, %k1 967; AVX512F-NEXT: kxorw %k1, %k0, %k0 968; AVX512F-NEXT: kmovw %k0, %eax 969; AVX512F-NEXT: # kill: def $al killed $al killed $eax 970; AVX512F-NEXT: vzeroupper 971; AVX512F-NEXT: retq 972; 973; AVX512BW-LABEL: trunc_v64i8_v64i1: 974; AVX512BW: # %bb.0: 975; AVX512BW-NEXT: vpsllw $7, %zmm0, %zmm0 976; AVX512BW-NEXT: vpmovb2m %zmm0, %k0 977; AVX512BW-NEXT: kmovq %k0, %rax 978; AVX512BW-NEXT: movq %rax, %rcx 979; AVX512BW-NEXT: shrq $32, %rcx 980; AVX512BW-NEXT: xorl %eax, %ecx 981; AVX512BW-NEXT: movl %ecx, %eax 982; AVX512BW-NEXT: shrl $16, %eax 983; AVX512BW-NEXT: xorl %ecx, %eax 984; AVX512BW-NEXT: xorb %ah, %al 985; AVX512BW-NEXT: setnp %al 986; AVX512BW-NEXT: vzeroupper 987; AVX512BW-NEXT: retq 988; 989; AVX512VL-LABEL: trunc_v64i8_v64i1: 990; AVX512VL: # %bb.0: 991; AVX512VL-NEXT: vpsllw $7, %zmm0, %zmm0 992; AVX512VL-NEXT: vpmovb2m %zmm0, %k0 993; AVX512VL-NEXT: kmovq %k0, %rax 994; AVX512VL-NEXT: movq %rax, %rcx 995; AVX512VL-NEXT: shrq $32, %rcx 996; AVX512VL-NEXT: xorl %eax, %ecx 997; AVX512VL-NEXT: movl %ecx, %eax 998; AVX512VL-NEXT: shrl $16, %eax 999; AVX512VL-NEXT: xorl %ecx, %eax 1000; AVX512VL-NEXT: xorb %ah, %al 1001; AVX512VL-NEXT: setnp %al 1002; AVX512VL-NEXT: vzeroupper 1003; AVX512VL-NEXT: retq 1004 %a = trunc <64 x i8> %0 to <64 x i1> 1005 %b = call i1 @llvm.vector.reduce.xor.v64i1(<64 x i1> %a) 1006 ret i1 %b 1007} 1008 1009; 1010; Comparison With Zero 1011; 1012 1013define i1 @icmp0_v2i64_v2i1(<2 x i64>) nounwind { 1014; SSE2-LABEL: icmp0_v2i64_v2i1: 1015; SSE2: # %bb.0: 1016; SSE2-NEXT: pxor %xmm1, %xmm1 1017; SSE2-NEXT: pcmpeqd %xmm0, %xmm1 1018; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2] 1019; SSE2-NEXT: pand %xmm1, %xmm0 1020; SSE2-NEXT: movmskpd %xmm0, %eax 1021; SSE2-NEXT: testb %al, %al 1022; SSE2-NEXT: setnp %al 1023; SSE2-NEXT: ret{{[l|q]}} 1024; 1025; SSE41-LABEL: icmp0_v2i64_v2i1: 1026; SSE41: # %bb.0: 1027; SSE41-NEXT: pxor %xmm1, %xmm1 1028; SSE41-NEXT: pcmpeqq %xmm0, %xmm1 1029; SSE41-NEXT: movmskpd %xmm1, %eax 1030; SSE41-NEXT: testb %al, %al 1031; SSE41-NEXT: setnp %al 1032; SSE41-NEXT: retq 1033; 1034; AVX-LABEL: icmp0_v2i64_v2i1: 1035; AVX: # %bb.0: 1036; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 1037; AVX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 1038; AVX-NEXT: vmovmskpd %xmm0, %eax 1039; AVX-NEXT: testb %al, %al 1040; AVX-NEXT: setnp %al 1041; AVX-NEXT: retq 1042; 1043; AVX512F-LABEL: icmp0_v2i64_v2i1: 1044; AVX512F: # %bb.0: 1045; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1046; AVX512F-NEXT: vptestnmq %zmm0, %zmm0, %k0 1047; AVX512F-NEXT: kmovw %k0, %eax 1048; AVX512F-NEXT: testb $3, %al 1049; AVX512F-NEXT: setnp %al 1050; AVX512F-NEXT: vzeroupper 1051; AVX512F-NEXT: retq 1052; 1053; AVX512BW-LABEL: icmp0_v2i64_v2i1: 1054; AVX512BW: # %bb.0: 1055; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1056; AVX512BW-NEXT: vptestnmq %zmm0, %zmm0, %k0 1057; AVX512BW-NEXT: kmovd %k0, %eax 1058; AVX512BW-NEXT: testb $3, %al 1059; AVX512BW-NEXT: setnp %al 1060; AVX512BW-NEXT: vzeroupper 1061; AVX512BW-NEXT: retq 1062; 1063; AVX512VL-LABEL: icmp0_v2i64_v2i1: 1064; AVX512VL: # %bb.0: 1065; AVX512VL-NEXT: vptestnmq %xmm0, %xmm0, %k0 1066; AVX512VL-NEXT: kmovd %k0, %eax 1067; AVX512VL-NEXT: testb %al, %al 1068; AVX512VL-NEXT: setnp %al 1069; AVX512VL-NEXT: retq 1070 %a = icmp eq <2 x i64> %0, zeroinitializer 1071 %b = call i1 @llvm.vector.reduce.xor.v2i1(<2 x i1> %a) 1072 ret i1 %b 1073} 1074 1075define i1 @icmp0_v4i32_v4i1(<4 x i32>) nounwind { 1076; SSE-LABEL: icmp0_v4i32_v4i1: 1077; SSE: # %bb.0: 1078; SSE-NEXT: pxor %xmm1, %xmm1 1079; SSE-NEXT: pcmpeqd %xmm0, %xmm1 1080; SSE-NEXT: movmskps %xmm1, %eax 1081; SSE-NEXT: testb %al, %al 1082; SSE-NEXT: setnp %al 1083; SSE-NEXT: ret{{[l|q]}} 1084; 1085; AVX-LABEL: icmp0_v4i32_v4i1: 1086; AVX: # %bb.0: 1087; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 1088; AVX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1089; AVX-NEXT: vmovmskps %xmm0, %eax 1090; AVX-NEXT: testb %al, %al 1091; AVX-NEXT: setnp %al 1092; AVX-NEXT: retq 1093; 1094; AVX512F-LABEL: icmp0_v4i32_v4i1: 1095; AVX512F: # %bb.0: 1096; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1097; AVX512F-NEXT: vptestnmd %zmm0, %zmm0, %k0 1098; AVX512F-NEXT: kmovw %k0, %eax 1099; AVX512F-NEXT: testb $15, %al 1100; AVX512F-NEXT: setnp %al 1101; AVX512F-NEXT: vzeroupper 1102; AVX512F-NEXT: retq 1103; 1104; AVX512BW-LABEL: icmp0_v4i32_v4i1: 1105; AVX512BW: # %bb.0: 1106; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1107; AVX512BW-NEXT: vptestnmd %zmm0, %zmm0, %k0 1108; AVX512BW-NEXT: kmovd %k0, %eax 1109; AVX512BW-NEXT: testb $15, %al 1110; AVX512BW-NEXT: setnp %al 1111; AVX512BW-NEXT: vzeroupper 1112; AVX512BW-NEXT: retq 1113; 1114; AVX512VL-LABEL: icmp0_v4i32_v4i1: 1115; AVX512VL: # %bb.0: 1116; AVX512VL-NEXT: vptestnmd %xmm0, %xmm0, %k0 1117; AVX512VL-NEXT: kmovd %k0, %eax 1118; AVX512VL-NEXT: testb %al, %al 1119; AVX512VL-NEXT: setnp %al 1120; AVX512VL-NEXT: retq 1121 %a = icmp eq <4 x i32> %0, zeroinitializer 1122 %b = call i1 @llvm.vector.reduce.xor.v4i1(<4 x i1> %a) 1123 ret i1 %b 1124} 1125 1126define i1 @icmp0_v8i16_v8i1(<8 x i16>) nounwind { 1127; SSE-LABEL: icmp0_v8i16_v8i1: 1128; SSE: # %bb.0: 1129; SSE-NEXT: pxor %xmm1, %xmm1 1130; SSE-NEXT: pcmpeqw %xmm0, %xmm1 1131; SSE-NEXT: packsswb %xmm1, %xmm1 1132; SSE-NEXT: pmovmskb %xmm1, %eax 1133; SSE-NEXT: testb %al, %al 1134; SSE-NEXT: setnp %al 1135; SSE-NEXT: ret{{[l|q]}} 1136; 1137; AVX-LABEL: icmp0_v8i16_v8i1: 1138; AVX: # %bb.0: 1139; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 1140; AVX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 1141; AVX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 1142; AVX-NEXT: vpmovmskb %xmm0, %eax 1143; AVX-NEXT: testb %al, %al 1144; AVX-NEXT: setnp %al 1145; AVX-NEXT: retq 1146; 1147; AVX512F-LABEL: icmp0_v8i16_v8i1: 1148; AVX512F: # %bb.0: 1149; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 1150; AVX512F-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 1151; AVX512F-NEXT: vpmovsxwq %xmm0, %zmm0 1152; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0 1153; AVX512F-NEXT: kmovw %k0, %eax 1154; AVX512F-NEXT: testb %al, %al 1155; AVX512F-NEXT: setnp %al 1156; AVX512F-NEXT: vzeroupper 1157; AVX512F-NEXT: retq 1158; 1159; AVX512BW-LABEL: icmp0_v8i16_v8i1: 1160; AVX512BW: # %bb.0: 1161; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1162; AVX512BW-NEXT: vptestnmw %zmm0, %zmm0, %k0 1163; AVX512BW-NEXT: kmovd %k0, %eax 1164; AVX512BW-NEXT: testb %al, %al 1165; AVX512BW-NEXT: setnp %al 1166; AVX512BW-NEXT: vzeroupper 1167; AVX512BW-NEXT: retq 1168; 1169; AVX512VL-LABEL: icmp0_v8i16_v8i1: 1170; AVX512VL: # %bb.0: 1171; AVX512VL-NEXT: vptestnmw %xmm0, %xmm0, %k0 1172; AVX512VL-NEXT: kmovd %k0, %eax 1173; AVX512VL-NEXT: testb %al, %al 1174; AVX512VL-NEXT: setnp %al 1175; AVX512VL-NEXT: retq 1176 %a = icmp eq <8 x i16> %0, zeroinitializer 1177 %b = call i1 @llvm.vector.reduce.xor.v8i1(<8 x i1> %a) 1178 ret i1 %b 1179} 1180 1181define i1 @icmp0_v16i8_v16i1(<16 x i8>) nounwind { 1182; SSE-LABEL: icmp0_v16i8_v16i1: 1183; SSE: # %bb.0: 1184; SSE-NEXT: pxor %xmm1, %xmm1 1185; SSE-NEXT: pcmpeqb %xmm0, %xmm1 1186; SSE-NEXT: pmovmskb %xmm1, %eax 1187; SSE-NEXT: xorb %ah, %al 1188; SSE-NEXT: setnp %al 1189; SSE-NEXT: ret{{[l|q]}} 1190; 1191; AVX-LABEL: icmp0_v16i8_v16i1: 1192; AVX: # %bb.0: 1193; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 1194; AVX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 1195; AVX-NEXT: vpmovmskb %xmm0, %eax 1196; AVX-NEXT: xorb %ah, %al 1197; AVX-NEXT: setnp %al 1198; AVX-NEXT: retq 1199; 1200; AVX512F-LABEL: icmp0_v16i8_v16i1: 1201; AVX512F: # %bb.0: 1202; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 1203; AVX512F-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 1204; AVX512F-NEXT: vpmovmskb %xmm0, %eax 1205; AVX512F-NEXT: xorb %ah, %al 1206; AVX512F-NEXT: setnp %al 1207; AVX512F-NEXT: retq 1208; 1209; AVX512BW-LABEL: icmp0_v16i8_v16i1: 1210; AVX512BW: # %bb.0: 1211; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1212; AVX512BW-NEXT: vptestnmb %zmm0, %zmm0, %k0 1213; AVX512BW-NEXT: kmovd %k0, %eax 1214; AVX512BW-NEXT: movl %eax, %ecx 1215; AVX512BW-NEXT: shrl $8, %ecx 1216; AVX512BW-NEXT: xorb %al, %cl 1217; AVX512BW-NEXT: setnp %al 1218; AVX512BW-NEXT: vzeroupper 1219; AVX512BW-NEXT: retq 1220; 1221; AVX512VL-LABEL: icmp0_v16i8_v16i1: 1222; AVX512VL: # %bb.0: 1223; AVX512VL-NEXT: vptestnmb %xmm0, %xmm0, %k0 1224; AVX512VL-NEXT: kmovd %k0, %eax 1225; AVX512VL-NEXT: movl %eax, %ecx 1226; AVX512VL-NEXT: shrl $8, %ecx 1227; AVX512VL-NEXT: xorb %al, %cl 1228; AVX512VL-NEXT: setnp %al 1229; AVX512VL-NEXT: retq 1230 %a = icmp eq <16 x i8> %0, zeroinitializer 1231 %b = call i1 @llvm.vector.reduce.xor.v16i1(<16 x i1> %a) 1232 ret i1 %b 1233} 1234 1235define i1 @icmp0_v4i64_v4i1(<4 x i64>) nounwind { 1236; SSE2-LABEL: icmp0_v4i64_v4i1: 1237; SSE2: # %bb.0: 1238; SSE2-NEXT: pxor %xmm2, %xmm2 1239; SSE2-NEXT: pcmpeqd %xmm2, %xmm1 1240; SSE2-NEXT: pcmpeqd %xmm2, %xmm0 1241; SSE2-NEXT: movdqa %xmm0, %xmm2 1242; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,3],xmm1[1,3] 1243; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] 1244; SSE2-NEXT: andps %xmm2, %xmm0 1245; SSE2-NEXT: movmskps %xmm0, %eax 1246; SSE2-NEXT: testb %al, %al 1247; SSE2-NEXT: setnp %al 1248; SSE2-NEXT: ret{{[l|q]}} 1249; 1250; SSE41-LABEL: icmp0_v4i64_v4i1: 1251; SSE41: # %bb.0: 1252; SSE41-NEXT: pxor %xmm2, %xmm2 1253; SSE41-NEXT: pcmpeqq %xmm2, %xmm1 1254; SSE41-NEXT: pcmpeqq %xmm2, %xmm0 1255; SSE41-NEXT: packssdw %xmm1, %xmm0 1256; SSE41-NEXT: movmskps %xmm0, %eax 1257; SSE41-NEXT: testb %al, %al 1258; SSE41-NEXT: setnp %al 1259; SSE41-NEXT: retq 1260; 1261; AVX1-LABEL: icmp0_v4i64_v4i1: 1262; AVX1: # %bb.0: 1263; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1264; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 1265; AVX1-NEXT: vpcmpeqq %xmm2, %xmm1, %xmm1 1266; AVX1-NEXT: vpcmpeqq %xmm2, %xmm0, %xmm0 1267; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1268; AVX1-NEXT: vmovmskpd %ymm0, %eax 1269; AVX1-NEXT: testb %al, %al 1270; AVX1-NEXT: setnp %al 1271; AVX1-NEXT: vzeroupper 1272; AVX1-NEXT: retq 1273; 1274; AVX2-LABEL: icmp0_v4i64_v4i1: 1275; AVX2: # %bb.0: 1276; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 1277; AVX2-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 1278; AVX2-NEXT: vmovmskpd %ymm0, %eax 1279; AVX2-NEXT: testb %al, %al 1280; AVX2-NEXT: setnp %al 1281; AVX2-NEXT: vzeroupper 1282; AVX2-NEXT: retq 1283; 1284; AVX512F-LABEL: icmp0_v4i64_v4i1: 1285; AVX512F: # %bb.0: 1286; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 1287; AVX512F-NEXT: vptestnmq %zmm0, %zmm0, %k0 1288; AVX512F-NEXT: kmovw %k0, %eax 1289; AVX512F-NEXT: testb $15, %al 1290; AVX512F-NEXT: setnp %al 1291; AVX512F-NEXT: vzeroupper 1292; AVX512F-NEXT: retq 1293; 1294; AVX512BW-LABEL: icmp0_v4i64_v4i1: 1295; AVX512BW: # %bb.0: 1296; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 1297; AVX512BW-NEXT: vptestnmq %zmm0, %zmm0, %k0 1298; AVX512BW-NEXT: kmovd %k0, %eax 1299; AVX512BW-NEXT: testb $15, %al 1300; AVX512BW-NEXT: setnp %al 1301; AVX512BW-NEXT: vzeroupper 1302; AVX512BW-NEXT: retq 1303; 1304; AVX512VL-LABEL: icmp0_v4i64_v4i1: 1305; AVX512VL: # %bb.0: 1306; AVX512VL-NEXT: vptestnmq %ymm0, %ymm0, %k0 1307; AVX512VL-NEXT: kmovd %k0, %eax 1308; AVX512VL-NEXT: testb %al, %al 1309; AVX512VL-NEXT: setnp %al 1310; AVX512VL-NEXT: vzeroupper 1311; AVX512VL-NEXT: retq 1312 %a = icmp eq <4 x i64> %0, zeroinitializer 1313 %b = call i1 @llvm.vector.reduce.xor.v4i1(<4 x i1> %a) 1314 ret i1 %b 1315} 1316 1317define i1 @icmp0_v8i32_v8i1(<8 x i32>) nounwind { 1318; SSE-LABEL: icmp0_v8i32_v8i1: 1319; SSE: # %bb.0: 1320; SSE-NEXT: pxor %xmm2, %xmm2 1321; SSE-NEXT: pcmpeqd %xmm2, %xmm1 1322; SSE-NEXT: pcmpeqd %xmm2, %xmm0 1323; SSE-NEXT: packssdw %xmm1, %xmm0 1324; SSE-NEXT: packsswb %xmm0, %xmm0 1325; SSE-NEXT: pmovmskb %xmm0, %eax 1326; SSE-NEXT: testb %al, %al 1327; SSE-NEXT: setnp %al 1328; SSE-NEXT: ret{{[l|q]}} 1329; 1330; AVX1-LABEL: icmp0_v8i32_v8i1: 1331; AVX1: # %bb.0: 1332; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1333; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 1334; AVX1-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm1 1335; AVX1-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0 1336; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1337; AVX1-NEXT: vmovmskps %ymm0, %eax 1338; AVX1-NEXT: testb %al, %al 1339; AVX1-NEXT: setnp %al 1340; AVX1-NEXT: vzeroupper 1341; AVX1-NEXT: retq 1342; 1343; AVX2-LABEL: icmp0_v8i32_v8i1: 1344; AVX2: # %bb.0: 1345; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 1346; AVX2-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 1347; AVX2-NEXT: vmovmskps %ymm0, %eax 1348; AVX2-NEXT: testb %al, %al 1349; AVX2-NEXT: setnp %al 1350; AVX2-NEXT: vzeroupper 1351; AVX2-NEXT: retq 1352; 1353; AVX512F-LABEL: icmp0_v8i32_v8i1: 1354; AVX512F: # %bb.0: 1355; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 1356; AVX512F-NEXT: vptestnmd %zmm0, %zmm0, %k0 1357; AVX512F-NEXT: kmovw %k0, %eax 1358; AVX512F-NEXT: testb %al, %al 1359; AVX512F-NEXT: setnp %al 1360; AVX512F-NEXT: vzeroupper 1361; AVX512F-NEXT: retq 1362; 1363; AVX512BW-LABEL: icmp0_v8i32_v8i1: 1364; AVX512BW: # %bb.0: 1365; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 1366; AVX512BW-NEXT: vptestnmd %zmm0, %zmm0, %k0 1367; AVX512BW-NEXT: kmovd %k0, %eax 1368; AVX512BW-NEXT: testb %al, %al 1369; AVX512BW-NEXT: setnp %al 1370; AVX512BW-NEXT: vzeroupper 1371; AVX512BW-NEXT: retq 1372; 1373; AVX512VL-LABEL: icmp0_v8i32_v8i1: 1374; AVX512VL: # %bb.0: 1375; AVX512VL-NEXT: vptestnmd %ymm0, %ymm0, %k0 1376; AVX512VL-NEXT: kmovd %k0, %eax 1377; AVX512VL-NEXT: testb %al, %al 1378; AVX512VL-NEXT: setnp %al 1379; AVX512VL-NEXT: vzeroupper 1380; AVX512VL-NEXT: retq 1381 %a = icmp eq <8 x i32> %0, zeroinitializer 1382 %b = call i1 @llvm.vector.reduce.xor.v8i1(<8 x i1> %a) 1383 ret i1 %b 1384} 1385 1386define i1 @icmp0_v16i16_v16i1(<16 x i16>) nounwind { 1387; SSE-LABEL: icmp0_v16i16_v16i1: 1388; SSE: # %bb.0: 1389; SSE-NEXT: pxor %xmm2, %xmm2 1390; SSE-NEXT: pcmpeqw %xmm2, %xmm1 1391; SSE-NEXT: pcmpeqw %xmm2, %xmm0 1392; SSE-NEXT: packsswb %xmm1, %xmm0 1393; SSE-NEXT: pmovmskb %xmm0, %eax 1394; SSE-NEXT: xorb %ah, %al 1395; SSE-NEXT: setnp %al 1396; SSE-NEXT: ret{{[l|q]}} 1397; 1398; AVX1-LABEL: icmp0_v16i16_v16i1: 1399; AVX1: # %bb.0: 1400; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1401; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 1402; AVX1-NEXT: vpcmpeqw %xmm2, %xmm1, %xmm1 1403; AVX1-NEXT: vpcmpeqw %xmm2, %xmm0, %xmm0 1404; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 1405; AVX1-NEXT: vpmovmskb %xmm0, %eax 1406; AVX1-NEXT: xorb %ah, %al 1407; AVX1-NEXT: setnp %al 1408; AVX1-NEXT: vzeroupper 1409; AVX1-NEXT: retq 1410; 1411; AVX2-LABEL: icmp0_v16i16_v16i1: 1412; AVX2: # %bb.0: 1413; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 1414; AVX2-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 1415; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1416; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 1417; AVX2-NEXT: vpmovmskb %xmm0, %eax 1418; AVX2-NEXT: xorb %ah, %al 1419; AVX2-NEXT: setnp %al 1420; AVX2-NEXT: vzeroupper 1421; AVX2-NEXT: retq 1422; 1423; AVX512F-LABEL: icmp0_v16i16_v16i1: 1424; AVX512F: # %bb.0: 1425; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 1426; AVX512F-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 1427; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0 1428; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 1429; AVX512F-NEXT: kmovw %k0, %eax 1430; AVX512F-NEXT: movl %eax, %ecx 1431; AVX512F-NEXT: shrl $8, %ecx 1432; AVX512F-NEXT: xorb %al, %cl 1433; AVX512F-NEXT: setnp %al 1434; AVX512F-NEXT: vzeroupper 1435; AVX512F-NEXT: retq 1436; 1437; AVX512BW-LABEL: icmp0_v16i16_v16i1: 1438; AVX512BW: # %bb.0: 1439; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 1440; AVX512BW-NEXT: vptestnmw %zmm0, %zmm0, %k0 1441; AVX512BW-NEXT: kmovd %k0, %eax 1442; AVX512BW-NEXT: movl %eax, %ecx 1443; AVX512BW-NEXT: shrl $8, %ecx 1444; AVX512BW-NEXT: xorb %al, %cl 1445; AVX512BW-NEXT: setnp %al 1446; AVX512BW-NEXT: vzeroupper 1447; AVX512BW-NEXT: retq 1448; 1449; AVX512VL-LABEL: icmp0_v16i16_v16i1: 1450; AVX512VL: # %bb.0: 1451; AVX512VL-NEXT: vptestnmw %ymm0, %ymm0, %k0 1452; AVX512VL-NEXT: kmovd %k0, %eax 1453; AVX512VL-NEXT: movl %eax, %ecx 1454; AVX512VL-NEXT: shrl $8, %ecx 1455; AVX512VL-NEXT: xorb %al, %cl 1456; AVX512VL-NEXT: setnp %al 1457; AVX512VL-NEXT: vzeroupper 1458; AVX512VL-NEXT: retq 1459 %a = icmp eq <16 x i16> %0, zeroinitializer 1460 %b = call i1 @llvm.vector.reduce.xor.v16i1(<16 x i1> %a) 1461 ret i1 %b 1462} 1463 1464define i1 @icmp0_v32i8_v32i1(<32 x i8>) nounwind { 1465; SSE-LABEL: icmp0_v32i8_v32i1: 1466; SSE: # %bb.0: 1467; SSE-NEXT: pxor %xmm2, %xmm2 1468; SSE-NEXT: pcmpeqb %xmm2, %xmm1 1469; SSE-NEXT: pcmpeqb %xmm2, %xmm0 1470; SSE-NEXT: pxor %xmm1, %xmm0 1471; SSE-NEXT: pmovmskb %xmm0, %eax 1472; SSE-NEXT: xorb %ah, %al 1473; SSE-NEXT: setnp %al 1474; SSE-NEXT: ret{{[l|q]}} 1475; 1476; AVX1-LABEL: icmp0_v32i8_v32i1: 1477; AVX1: # %bb.0: 1478; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1479; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 1480; AVX1-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm1 1481; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0 1482; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 1483; AVX1-NEXT: vpmovmskb %xmm0, %eax 1484; AVX1-NEXT: xorb %ah, %al 1485; AVX1-NEXT: setnp %al 1486; AVX1-NEXT: vzeroupper 1487; AVX1-NEXT: retq 1488; 1489; AVX2-LABEL: icmp0_v32i8_v32i1: 1490; AVX2: # %bb.0: 1491; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 1492; AVX2-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 1493; AVX2-NEXT: vpmovmskb %ymm0, %eax 1494; AVX2-NEXT: movl %eax, %ecx 1495; AVX2-NEXT: shrl $16, %ecx 1496; AVX2-NEXT: xorl %eax, %ecx 1497; AVX2-NEXT: xorb %ch, %cl 1498; AVX2-NEXT: setnp %al 1499; AVX2-NEXT: vzeroupper 1500; AVX2-NEXT: retq 1501; 1502; AVX512F-LABEL: icmp0_v32i8_v32i1: 1503; AVX512F: # %bb.0: 1504; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 1505; AVX512F-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 1506; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 1507; AVX512F-NEXT: vpxor %xmm1, %xmm0, %xmm0 1508; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 1509; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 1510; AVX512F-NEXT: kshiftrw $8, %k0, %k1 1511; AVX512F-NEXT: kxorw %k1, %k0, %k0 1512; AVX512F-NEXT: kshiftrw $4, %k0, %k1 1513; AVX512F-NEXT: kxorw %k1, %k0, %k0 1514; AVX512F-NEXT: kshiftrw $2, %k0, %k1 1515; AVX512F-NEXT: kxorw %k1, %k0, %k0 1516; AVX512F-NEXT: kshiftrw $1, %k0, %k1 1517; AVX512F-NEXT: kxorw %k1, %k0, %k0 1518; AVX512F-NEXT: kmovw %k0, %eax 1519; AVX512F-NEXT: # kill: def $al killed $al killed $eax 1520; AVX512F-NEXT: vzeroupper 1521; AVX512F-NEXT: retq 1522; 1523; AVX512BW-LABEL: icmp0_v32i8_v32i1: 1524; AVX512BW: # %bb.0: 1525; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 1526; AVX512BW-NEXT: vptestnmb %zmm0, %zmm0, %k0 1527; AVX512BW-NEXT: kmovd %k0, %eax 1528; AVX512BW-NEXT: movl %eax, %ecx 1529; AVX512BW-NEXT: shrl $16, %ecx 1530; AVX512BW-NEXT: xorl %eax, %ecx 1531; AVX512BW-NEXT: xorb %ch, %cl 1532; AVX512BW-NEXT: setnp %al 1533; AVX512BW-NEXT: vzeroupper 1534; AVX512BW-NEXT: retq 1535; 1536; AVX512VL-LABEL: icmp0_v32i8_v32i1: 1537; AVX512VL: # %bb.0: 1538; AVX512VL-NEXT: vptestnmb %ymm0, %ymm0, %k0 1539; AVX512VL-NEXT: kmovd %k0, %eax 1540; AVX512VL-NEXT: movl %eax, %ecx 1541; AVX512VL-NEXT: shrl $16, %ecx 1542; AVX512VL-NEXT: xorl %eax, %ecx 1543; AVX512VL-NEXT: xorb %ch, %cl 1544; AVX512VL-NEXT: setnp %al 1545; AVX512VL-NEXT: vzeroupper 1546; AVX512VL-NEXT: retq 1547 %a = icmp eq <32 x i8> %0, zeroinitializer 1548 %b = call i1 @llvm.vector.reduce.xor.v32i1(<32 x i1> %a) 1549 ret i1 %b 1550} 1551 1552define i1 @icmp0_v8i64_v8i1(<8 x i64>) nounwind { 1553; X86-SSE2-LABEL: icmp0_v8i64_v8i1: 1554; X86-SSE2: # %bb.0: 1555; X86-SSE2-NEXT: pushl %ebp 1556; X86-SSE2-NEXT: movl %esp, %ebp 1557; X86-SSE2-NEXT: andl $-16, %esp 1558; X86-SSE2-NEXT: subl $16, %esp 1559; X86-SSE2-NEXT: pxor %xmm3, %xmm3 1560; X86-SSE2-NEXT: pcmpeqd %xmm3, %xmm1 1561; X86-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm1[1,0,3,2] 1562; X86-SSE2-NEXT: pand %xmm1, %xmm4 1563; X86-SSE2-NEXT: pcmpeqd %xmm3, %xmm0 1564; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] 1565; X86-SSE2-NEXT: pand %xmm0, %xmm1 1566; X86-SSE2-NEXT: packssdw %xmm4, %xmm1 1567; X86-SSE2-NEXT: pcmpeqd %xmm3, %xmm2 1568; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,0,3,2] 1569; X86-SSE2-NEXT: pand %xmm2, %xmm0 1570; X86-SSE2-NEXT: pcmpeqd 8(%ebp), %xmm3 1571; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,0,3,2] 1572; X86-SSE2-NEXT: pand %xmm3, %xmm2 1573; X86-SSE2-NEXT: packssdw %xmm2, %xmm0 1574; X86-SSE2-NEXT: packssdw %xmm0, %xmm1 1575; X86-SSE2-NEXT: packsswb %xmm1, %xmm1 1576; X86-SSE2-NEXT: pmovmskb %xmm1, %eax 1577; X86-SSE2-NEXT: testb %al, %al 1578; X86-SSE2-NEXT: setnp %al 1579; X86-SSE2-NEXT: movl %ebp, %esp 1580; X86-SSE2-NEXT: popl %ebp 1581; X86-SSE2-NEXT: retl 1582; 1583; X64-SSE2-LABEL: icmp0_v8i64_v8i1: 1584; X64-SSE2: # %bb.0: 1585; X64-SSE2-NEXT: pxor %xmm4, %xmm4 1586; X64-SSE2-NEXT: pcmpeqd %xmm4, %xmm3 1587; X64-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm3[1,0,3,2] 1588; X64-SSE2-NEXT: pand %xmm3, %xmm5 1589; X64-SSE2-NEXT: pcmpeqd %xmm4, %xmm2 1590; X64-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,0,3,2] 1591; X64-SSE2-NEXT: pand %xmm2, %xmm3 1592; X64-SSE2-NEXT: packssdw %xmm5, %xmm3 1593; X64-SSE2-NEXT: pcmpeqd %xmm4, %xmm1 1594; X64-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,0,3,2] 1595; X64-SSE2-NEXT: pand %xmm1, %xmm2 1596; X64-SSE2-NEXT: pcmpeqd %xmm4, %xmm0 1597; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] 1598; X64-SSE2-NEXT: pand %xmm0, %xmm1 1599; X64-SSE2-NEXT: packssdw %xmm2, %xmm1 1600; X64-SSE2-NEXT: packssdw %xmm3, %xmm1 1601; X64-SSE2-NEXT: packsswb %xmm1, %xmm1 1602; X64-SSE2-NEXT: pmovmskb %xmm1, %eax 1603; X64-SSE2-NEXT: testb %al, %al 1604; X64-SSE2-NEXT: setnp %al 1605; X64-SSE2-NEXT: retq 1606; 1607; SSE41-LABEL: icmp0_v8i64_v8i1: 1608; SSE41: # %bb.0: 1609; SSE41-NEXT: pxor %xmm4, %xmm4 1610; SSE41-NEXT: pcmpeqq %xmm4, %xmm3 1611; SSE41-NEXT: pcmpeqq %xmm4, %xmm2 1612; SSE41-NEXT: packssdw %xmm3, %xmm2 1613; SSE41-NEXT: pcmpeqq %xmm4, %xmm1 1614; SSE41-NEXT: pcmpeqq %xmm4, %xmm0 1615; SSE41-NEXT: packssdw %xmm1, %xmm0 1616; SSE41-NEXT: packssdw %xmm2, %xmm0 1617; SSE41-NEXT: packsswb %xmm0, %xmm0 1618; SSE41-NEXT: pmovmskb %xmm0, %eax 1619; SSE41-NEXT: testb %al, %al 1620; SSE41-NEXT: setnp %al 1621; SSE41-NEXT: retq 1622; 1623; AVX1-LABEL: icmp0_v8i64_v8i1: 1624; AVX1: # %bb.0: 1625; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1626; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 1627; AVX1-NEXT: vpcmpeqq %xmm3, %xmm2, %xmm2 1628; AVX1-NEXT: vpcmpeqq %xmm3, %xmm1, %xmm1 1629; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 1630; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 1631; AVX1-NEXT: vpcmpeqq %xmm3, %xmm2, %xmm2 1632; AVX1-NEXT: vpcmpeqq %xmm3, %xmm0, %xmm0 1633; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 1634; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1635; AVX1-NEXT: vmovmskps %ymm0, %eax 1636; AVX1-NEXT: testb %al, %al 1637; AVX1-NEXT: setnp %al 1638; AVX1-NEXT: vzeroupper 1639; AVX1-NEXT: retq 1640; 1641; AVX2-LABEL: icmp0_v8i64_v8i1: 1642; AVX2: # %bb.0: 1643; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 1644; AVX2-NEXT: vpcmpeqq %ymm2, %ymm1, %ymm1 1645; AVX2-NEXT: vpcmpeqq %ymm2, %ymm0, %ymm0 1646; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 1647; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] 1648; AVX2-NEXT: vmovmskps %ymm0, %eax 1649; AVX2-NEXT: testb %al, %al 1650; AVX2-NEXT: setnp %al 1651; AVX2-NEXT: vzeroupper 1652; AVX2-NEXT: retq 1653; 1654; AVX512F-LABEL: icmp0_v8i64_v8i1: 1655; AVX512F: # %bb.0: 1656; AVX512F-NEXT: vptestnmq %zmm0, %zmm0, %k0 1657; AVX512F-NEXT: kmovw %k0, %eax 1658; AVX512F-NEXT: testb %al, %al 1659; AVX512F-NEXT: setnp %al 1660; AVX512F-NEXT: vzeroupper 1661; AVX512F-NEXT: retq 1662; 1663; AVX512BW-LABEL: icmp0_v8i64_v8i1: 1664; AVX512BW: # %bb.0: 1665; AVX512BW-NEXT: vptestnmq %zmm0, %zmm0, %k0 1666; AVX512BW-NEXT: kmovd %k0, %eax 1667; AVX512BW-NEXT: testb %al, %al 1668; AVX512BW-NEXT: setnp %al 1669; AVX512BW-NEXT: vzeroupper 1670; AVX512BW-NEXT: retq 1671; 1672; AVX512VL-LABEL: icmp0_v8i64_v8i1: 1673; AVX512VL: # %bb.0: 1674; AVX512VL-NEXT: vptestnmq %zmm0, %zmm0, %k0 1675; AVX512VL-NEXT: kmovd %k0, %eax 1676; AVX512VL-NEXT: testb %al, %al 1677; AVX512VL-NEXT: setnp %al 1678; AVX512VL-NEXT: vzeroupper 1679; AVX512VL-NEXT: retq 1680 %a = icmp eq <8 x i64> %0, zeroinitializer 1681 %b = call i1 @llvm.vector.reduce.xor.v8i1(<8 x i1> %a) 1682 ret i1 %b 1683} 1684 1685define i1 @icmp0_v16i32_v16i1(<16 x i32>) nounwind { 1686; X86-SSE2-LABEL: icmp0_v16i32_v16i1: 1687; X86-SSE2: # %bb.0: 1688; X86-SSE2-NEXT: pushl %ebp 1689; X86-SSE2-NEXT: movl %esp, %ebp 1690; X86-SSE2-NEXT: andl $-16, %esp 1691; X86-SSE2-NEXT: subl $16, %esp 1692; X86-SSE2-NEXT: pxor %xmm3, %xmm3 1693; X86-SSE2-NEXT: pcmpeqd %xmm3, %xmm1 1694; X86-SSE2-NEXT: pcmpeqd %xmm3, %xmm0 1695; X86-SSE2-NEXT: packssdw %xmm1, %xmm0 1696; X86-SSE2-NEXT: pcmpeqd %xmm3, %xmm2 1697; X86-SSE2-NEXT: pcmpeqd 8(%ebp), %xmm3 1698; X86-SSE2-NEXT: packssdw %xmm3, %xmm2 1699; X86-SSE2-NEXT: packsswb %xmm2, %xmm0 1700; X86-SSE2-NEXT: pmovmskb %xmm0, %eax 1701; X86-SSE2-NEXT: xorb %ah, %al 1702; X86-SSE2-NEXT: setnp %al 1703; X86-SSE2-NEXT: movl %ebp, %esp 1704; X86-SSE2-NEXT: popl %ebp 1705; X86-SSE2-NEXT: retl 1706; 1707; X64-SSE-LABEL: icmp0_v16i32_v16i1: 1708; X64-SSE: # %bb.0: 1709; X64-SSE-NEXT: pxor %xmm4, %xmm4 1710; X64-SSE-NEXT: pcmpeqd %xmm4, %xmm3 1711; X64-SSE-NEXT: pcmpeqd %xmm4, %xmm2 1712; X64-SSE-NEXT: packssdw %xmm3, %xmm2 1713; X64-SSE-NEXT: pcmpeqd %xmm4, %xmm1 1714; X64-SSE-NEXT: pcmpeqd %xmm4, %xmm0 1715; X64-SSE-NEXT: packssdw %xmm1, %xmm0 1716; X64-SSE-NEXT: packsswb %xmm2, %xmm0 1717; X64-SSE-NEXT: pmovmskb %xmm0, %eax 1718; X64-SSE-NEXT: xorb %ah, %al 1719; X64-SSE-NEXT: setnp %al 1720; X64-SSE-NEXT: retq 1721; 1722; AVX1-LABEL: icmp0_v16i32_v16i1: 1723; AVX1: # %bb.0: 1724; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1725; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 1726; AVX1-NEXT: vpcmpeqd %xmm3, %xmm2, %xmm2 1727; AVX1-NEXT: vpcmpeqd %xmm3, %xmm1, %xmm1 1728; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 1729; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 1730; AVX1-NEXT: vpcmpeqd %xmm3, %xmm2, %xmm2 1731; AVX1-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0 1732; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 1733; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 1734; AVX1-NEXT: vpmovmskb %xmm0, %eax 1735; AVX1-NEXT: xorb %ah, %al 1736; AVX1-NEXT: setnp %al 1737; AVX1-NEXT: vzeroupper 1738; AVX1-NEXT: retq 1739; 1740; AVX2-LABEL: icmp0_v16i32_v16i1: 1741; AVX2: # %bb.0: 1742; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 1743; AVX2-NEXT: vpcmpeqd %ymm2, %ymm1, %ymm1 1744; AVX2-NEXT: vpcmpeqd %ymm2, %ymm0, %ymm0 1745; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 1746; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1747; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 1748; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] 1749; AVX2-NEXT: vpmovmskb %xmm0, %eax 1750; AVX2-NEXT: xorb %ah, %al 1751; AVX2-NEXT: setnp %al 1752; AVX2-NEXT: vzeroupper 1753; AVX2-NEXT: retq 1754; 1755; AVX512F-LABEL: icmp0_v16i32_v16i1: 1756; AVX512F: # %bb.0: 1757; AVX512F-NEXT: vptestnmd %zmm0, %zmm0, %k0 1758; AVX512F-NEXT: kmovw %k0, %eax 1759; AVX512F-NEXT: movl %eax, %ecx 1760; AVX512F-NEXT: shrl $8, %ecx 1761; AVX512F-NEXT: xorb %al, %cl 1762; AVX512F-NEXT: setnp %al 1763; AVX512F-NEXT: vzeroupper 1764; AVX512F-NEXT: retq 1765; 1766; AVX512BW-LABEL: icmp0_v16i32_v16i1: 1767; AVX512BW: # %bb.0: 1768; AVX512BW-NEXT: vptestnmd %zmm0, %zmm0, %k0 1769; AVX512BW-NEXT: kmovd %k0, %eax 1770; AVX512BW-NEXT: movl %eax, %ecx 1771; AVX512BW-NEXT: shrl $8, %ecx 1772; AVX512BW-NEXT: xorb %al, %cl 1773; AVX512BW-NEXT: setnp %al 1774; AVX512BW-NEXT: vzeroupper 1775; AVX512BW-NEXT: retq 1776; 1777; AVX512VL-LABEL: icmp0_v16i32_v16i1: 1778; AVX512VL: # %bb.0: 1779; AVX512VL-NEXT: vptestnmd %zmm0, %zmm0, %k0 1780; AVX512VL-NEXT: kmovd %k0, %eax 1781; AVX512VL-NEXT: movl %eax, %ecx 1782; AVX512VL-NEXT: shrl $8, %ecx 1783; AVX512VL-NEXT: xorb %al, %cl 1784; AVX512VL-NEXT: setnp %al 1785; AVX512VL-NEXT: vzeroupper 1786; AVX512VL-NEXT: retq 1787 %a = icmp eq <16 x i32> %0, zeroinitializer 1788 %b = call i1 @llvm.vector.reduce.xor.v16i1(<16 x i1> %a) 1789 ret i1 %b 1790} 1791 1792define i1 @icmp0_v32i16_v32i1(<32 x i16>) nounwind { 1793; X86-SSE2-LABEL: icmp0_v32i16_v32i1: 1794; X86-SSE2: # %bb.0: 1795; X86-SSE2-NEXT: pushl %ebp 1796; X86-SSE2-NEXT: movl %esp, %ebp 1797; X86-SSE2-NEXT: andl $-16, %esp 1798; X86-SSE2-NEXT: subl $16, %esp 1799; X86-SSE2-NEXT: pxor %xmm3, %xmm3 1800; X86-SSE2-NEXT: pcmpeqw %xmm3, %xmm1 1801; X86-SSE2-NEXT: pcmpeqw %xmm3, %xmm2 1802; X86-SSE2-NEXT: pcmpeqw %xmm3, %xmm0 1803; X86-SSE2-NEXT: pxor %xmm2, %xmm0 1804; X86-SSE2-NEXT: pcmpeqw 8(%ebp), %xmm3 1805; X86-SSE2-NEXT: pxor %xmm1, %xmm3 1806; X86-SSE2-NEXT: packsswb %xmm3, %xmm0 1807; X86-SSE2-NEXT: pmovmskb %xmm0, %eax 1808; X86-SSE2-NEXT: xorb %ah, %al 1809; X86-SSE2-NEXT: setnp %al 1810; X86-SSE2-NEXT: movl %ebp, %esp 1811; X86-SSE2-NEXT: popl %ebp 1812; X86-SSE2-NEXT: retl 1813; 1814; X64-SSE-LABEL: icmp0_v32i16_v32i1: 1815; X64-SSE: # %bb.0: 1816; X64-SSE-NEXT: pxor %xmm4, %xmm4 1817; X64-SSE-NEXT: pcmpeqw %xmm4, %xmm2 1818; X64-SSE-NEXT: pcmpeqw %xmm4, %xmm0 1819; X64-SSE-NEXT: pxor %xmm2, %xmm0 1820; X64-SSE-NEXT: pcmpeqw %xmm4, %xmm3 1821; X64-SSE-NEXT: pcmpeqw %xmm4, %xmm1 1822; X64-SSE-NEXT: pxor %xmm3, %xmm1 1823; X64-SSE-NEXT: packsswb %xmm1, %xmm0 1824; X64-SSE-NEXT: pmovmskb %xmm0, %eax 1825; X64-SSE-NEXT: xorb %ah, %al 1826; X64-SSE-NEXT: setnp %al 1827; X64-SSE-NEXT: retq 1828; 1829; AVX1-LABEL: icmp0_v32i16_v32i1: 1830; AVX1: # %bb.0: 1831; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 1832; AVX1-NEXT: vpcmpeqw %xmm2, %xmm1, %xmm3 1833; AVX1-NEXT: vpcmpeqw %xmm2, %xmm0, %xmm4 1834; AVX1-NEXT: vpxor %xmm3, %xmm4, %xmm3 1835; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 1836; AVX1-NEXT: vpcmpeqw %xmm2, %xmm1, %xmm1 1837; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1838; AVX1-NEXT: vpcmpeqw %xmm2, %xmm0, %xmm0 1839; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 1840; AVX1-NEXT: vpacksswb %xmm0, %xmm3, %xmm0 1841; AVX1-NEXT: vpmovmskb %xmm0, %eax 1842; AVX1-NEXT: xorb %ah, %al 1843; AVX1-NEXT: setnp %al 1844; AVX1-NEXT: vzeroupper 1845; AVX1-NEXT: retq 1846; 1847; AVX2-LABEL: icmp0_v32i16_v32i1: 1848; AVX2: # %bb.0: 1849; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 1850; AVX2-NEXT: vpcmpeqw %ymm2, %ymm1, %ymm1 1851; AVX2-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm0 1852; AVX2-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 1853; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] 1854; AVX2-NEXT: vpmovmskb %ymm0, %eax 1855; AVX2-NEXT: movl %eax, %ecx 1856; AVX2-NEXT: shrl $16, %ecx 1857; AVX2-NEXT: xorl %eax, %ecx 1858; AVX2-NEXT: xorb %ch, %cl 1859; AVX2-NEXT: setnp %al 1860; AVX2-NEXT: vzeroupper 1861; AVX2-NEXT: retq 1862; 1863; AVX512F-LABEL: icmp0_v32i16_v32i1: 1864; AVX512F: # %bb.0: 1865; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 1866; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2 1867; AVX512F-NEXT: vpcmpeqw %ymm2, %ymm1, %ymm1 1868; AVX512F-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm0 1869; AVX512F-NEXT: vpxor %ymm1, %ymm0, %ymm0 1870; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0 1871; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 1872; AVX512F-NEXT: kshiftrw $8, %k0, %k1 1873; AVX512F-NEXT: kxorw %k1, %k0, %k0 1874; AVX512F-NEXT: kshiftrw $4, %k0, %k1 1875; AVX512F-NEXT: kxorw %k1, %k0, %k0 1876; AVX512F-NEXT: kshiftrw $2, %k0, %k1 1877; AVX512F-NEXT: kxorw %k1, %k0, %k0 1878; AVX512F-NEXT: kshiftrw $1, %k0, %k1 1879; AVX512F-NEXT: kxorw %k1, %k0, %k0 1880; AVX512F-NEXT: kmovw %k0, %eax 1881; AVX512F-NEXT: # kill: def $al killed $al killed $eax 1882; AVX512F-NEXT: vzeroupper 1883; AVX512F-NEXT: retq 1884; 1885; AVX512BW-LABEL: icmp0_v32i16_v32i1: 1886; AVX512BW: # %bb.0: 1887; AVX512BW-NEXT: vptestnmw %zmm0, %zmm0, %k0 1888; AVX512BW-NEXT: kmovd %k0, %eax 1889; AVX512BW-NEXT: movl %eax, %ecx 1890; AVX512BW-NEXT: shrl $16, %ecx 1891; AVX512BW-NEXT: xorl %eax, %ecx 1892; AVX512BW-NEXT: xorb %ch, %cl 1893; AVX512BW-NEXT: setnp %al 1894; AVX512BW-NEXT: vzeroupper 1895; AVX512BW-NEXT: retq 1896; 1897; AVX512VL-LABEL: icmp0_v32i16_v32i1: 1898; AVX512VL: # %bb.0: 1899; AVX512VL-NEXT: vptestnmw %zmm0, %zmm0, %k0 1900; AVX512VL-NEXT: kmovd %k0, %eax 1901; AVX512VL-NEXT: movl %eax, %ecx 1902; AVX512VL-NEXT: shrl $16, %ecx 1903; AVX512VL-NEXT: xorl %eax, %ecx 1904; AVX512VL-NEXT: xorb %ch, %cl 1905; AVX512VL-NEXT: setnp %al 1906; AVX512VL-NEXT: vzeroupper 1907; AVX512VL-NEXT: retq 1908 %a = icmp eq <32 x i16> %0, zeroinitializer 1909 %b = call i1 @llvm.vector.reduce.xor.v32i1(<32 x i1> %a) 1910 ret i1 %b 1911} 1912 1913define i1 @icmp0_v64i8_v64i1(<64 x i8>) nounwind { 1914; X86-SSE2-LABEL: icmp0_v64i8_v64i1: 1915; X86-SSE2: # %bb.0: 1916; X86-SSE2-NEXT: pushl %ebp 1917; X86-SSE2-NEXT: movl %esp, %ebp 1918; X86-SSE2-NEXT: andl $-16, %esp 1919; X86-SSE2-NEXT: subl $16, %esp 1920; X86-SSE2-NEXT: pxor %xmm3, %xmm3 1921; X86-SSE2-NEXT: pcmpeqb %xmm3, %xmm1 1922; X86-SSE2-NEXT: pcmpeqb %xmm3, %xmm2 1923; X86-SSE2-NEXT: pcmpeqb %xmm3, %xmm0 1924; X86-SSE2-NEXT: pxor %xmm2, %xmm0 1925; X86-SSE2-NEXT: pcmpeqb 8(%ebp), %xmm3 1926; X86-SSE2-NEXT: pxor %xmm1, %xmm3 1927; X86-SSE2-NEXT: pxor %xmm0, %xmm3 1928; X86-SSE2-NEXT: pmovmskb %xmm3, %eax 1929; X86-SSE2-NEXT: xorb %ah, %al 1930; X86-SSE2-NEXT: setnp %al 1931; X86-SSE2-NEXT: movl %ebp, %esp 1932; X86-SSE2-NEXT: popl %ebp 1933; X86-SSE2-NEXT: retl 1934; 1935; X64-SSE-LABEL: icmp0_v64i8_v64i1: 1936; X64-SSE: # %bb.0: 1937; X64-SSE-NEXT: pxor %xmm4, %xmm4 1938; X64-SSE-NEXT: pcmpeqb %xmm4, %xmm2 1939; X64-SSE-NEXT: pcmpeqb %xmm4, %xmm0 1940; X64-SSE-NEXT: pxor %xmm2, %xmm0 1941; X64-SSE-NEXT: pcmpeqb %xmm4, %xmm3 1942; X64-SSE-NEXT: pcmpeqb %xmm4, %xmm1 1943; X64-SSE-NEXT: pxor %xmm3, %xmm1 1944; X64-SSE-NEXT: pxor %xmm0, %xmm1 1945; X64-SSE-NEXT: pmovmskb %xmm1, %eax 1946; X64-SSE-NEXT: xorb %ah, %al 1947; X64-SSE-NEXT: setnp %al 1948; X64-SSE-NEXT: retq 1949; 1950; AVX1-LABEL: icmp0_v64i8_v64i1: 1951; AVX1: # %bb.0: 1952; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 1953; AVX1-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm3 1954; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm4 1955; AVX1-NEXT: vpxor %xmm3, %xmm4, %xmm3 1956; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 1957; AVX1-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm1 1958; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1959; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0 1960; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 1961; AVX1-NEXT: vpxor %xmm0, %xmm3, %xmm0 1962; AVX1-NEXT: vpmovmskb %xmm0, %eax 1963; AVX1-NEXT: xorb %ah, %al 1964; AVX1-NEXT: setnp %al 1965; AVX1-NEXT: vzeroupper 1966; AVX1-NEXT: retq 1967; 1968; AVX2-LABEL: icmp0_v64i8_v64i1: 1969; AVX2: # %bb.0: 1970; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 1971; AVX2-NEXT: vpcmpeqb %ymm2, %ymm1, %ymm1 1972; AVX2-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0 1973; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0 1974; AVX2-NEXT: vpmovmskb %ymm0, %eax 1975; AVX2-NEXT: movl %eax, %ecx 1976; AVX2-NEXT: shrl $16, %ecx 1977; AVX2-NEXT: xorl %eax, %ecx 1978; AVX2-NEXT: xorb %ch, %cl 1979; AVX2-NEXT: setnp %al 1980; AVX2-NEXT: vzeroupper 1981; AVX2-NEXT: retq 1982; 1983; AVX512F-LABEL: icmp0_v64i8_v64i1: 1984; AVX512F: # %bb.0: 1985; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 1986; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2 1987; AVX512F-NEXT: vpcmpeqb %ymm2, %ymm1, %ymm1 1988; AVX512F-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0 1989; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 1990; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm3 1991; AVX512F-NEXT: vpxor %xmm2, %xmm3, %xmm2 1992; AVX512F-NEXT: vpxor %xmm1, %xmm0, %xmm0 1993; AVX512F-NEXT: vpxor %xmm2, %xmm0, %xmm0 1994; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 1995; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0 1996; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 1997; AVX512F-NEXT: kshiftrw $8, %k0, %k1 1998; AVX512F-NEXT: kxorw %k1, %k0, %k0 1999; AVX512F-NEXT: kshiftrw $4, %k0, %k1 2000; AVX512F-NEXT: kxorw %k1, %k0, %k0 2001; AVX512F-NEXT: kshiftrw $2, %k0, %k1 2002; AVX512F-NEXT: kxorw %k1, %k0, %k0 2003; AVX512F-NEXT: kshiftrw $1, %k0, %k1 2004; AVX512F-NEXT: kxorw %k1, %k0, %k0 2005; AVX512F-NEXT: kmovw %k0, %eax 2006; AVX512F-NEXT: # kill: def $al killed $al killed $eax 2007; AVX512F-NEXT: vzeroupper 2008; AVX512F-NEXT: retq 2009; 2010; AVX512BW-LABEL: icmp0_v64i8_v64i1: 2011; AVX512BW: # %bb.0: 2012; AVX512BW-NEXT: vptestnmb %zmm0, %zmm0, %k0 2013; AVX512BW-NEXT: kmovq %k0, %rax 2014; AVX512BW-NEXT: movq %rax, %rcx 2015; AVX512BW-NEXT: shrq $32, %rcx 2016; AVX512BW-NEXT: xorl %eax, %ecx 2017; AVX512BW-NEXT: movl %ecx, %eax 2018; AVX512BW-NEXT: shrl $16, %eax 2019; AVX512BW-NEXT: xorl %ecx, %eax 2020; AVX512BW-NEXT: xorb %ah, %al 2021; AVX512BW-NEXT: setnp %al 2022; AVX512BW-NEXT: vzeroupper 2023; AVX512BW-NEXT: retq 2024; 2025; AVX512VL-LABEL: icmp0_v64i8_v64i1: 2026; AVX512VL: # %bb.0: 2027; AVX512VL-NEXT: vptestnmb %zmm0, %zmm0, %k0 2028; AVX512VL-NEXT: kmovq %k0, %rax 2029; AVX512VL-NEXT: movq %rax, %rcx 2030; AVX512VL-NEXT: shrq $32, %rcx 2031; AVX512VL-NEXT: xorl %eax, %ecx 2032; AVX512VL-NEXT: movl %ecx, %eax 2033; AVX512VL-NEXT: shrl $16, %eax 2034; AVX512VL-NEXT: xorl %ecx, %eax 2035; AVX512VL-NEXT: xorb %ah, %al 2036; AVX512VL-NEXT: setnp %al 2037; AVX512VL-NEXT: vzeroupper 2038; AVX512VL-NEXT: retq 2039 %a = icmp eq <64 x i8> %0, zeroinitializer 2040 %b = call i1 @llvm.vector.reduce.xor.v64i1(<64 x i1> %a) 2041 ret i1 %b 2042} 2043 2044; Comparison 2045; 2046 2047define i1 @icmp_v2i64_v2i1(<2 x i64>, <2 x i64>) nounwind { 2048; SSE2-LABEL: icmp_v2i64_v2i1: 2049; SSE2: # %bb.0: 2050; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 2051; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] 2052; SSE2-NEXT: pand %xmm0, %xmm1 2053; SSE2-NEXT: movmskpd %xmm1, %eax 2054; SSE2-NEXT: testb %al, %al 2055; SSE2-NEXT: setnp %al 2056; SSE2-NEXT: ret{{[l|q]}} 2057; 2058; SSE41-LABEL: icmp_v2i64_v2i1: 2059; SSE41: # %bb.0: 2060; SSE41-NEXT: pcmpeqq %xmm1, %xmm0 2061; SSE41-NEXT: movmskpd %xmm0, %eax 2062; SSE41-NEXT: testb %al, %al 2063; SSE41-NEXT: setnp %al 2064; SSE41-NEXT: retq 2065; 2066; AVX-LABEL: icmp_v2i64_v2i1: 2067; AVX: # %bb.0: 2068; AVX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 2069; AVX-NEXT: vmovmskpd %xmm0, %eax 2070; AVX-NEXT: testb %al, %al 2071; AVX-NEXT: setnp %al 2072; AVX-NEXT: retq 2073; 2074; AVX512F-LABEL: icmp_v2i64_v2i1: 2075; AVX512F: # %bb.0: 2076; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 2077; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 2078; AVX512F-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 2079; AVX512F-NEXT: kmovw %k0, %eax 2080; AVX512F-NEXT: testb $3, %al 2081; AVX512F-NEXT: setnp %al 2082; AVX512F-NEXT: vzeroupper 2083; AVX512F-NEXT: retq 2084; 2085; AVX512BW-LABEL: icmp_v2i64_v2i1: 2086; AVX512BW: # %bb.0: 2087; AVX512BW-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 2088; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 2089; AVX512BW-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 2090; AVX512BW-NEXT: kmovd %k0, %eax 2091; AVX512BW-NEXT: testb $3, %al 2092; AVX512BW-NEXT: setnp %al 2093; AVX512BW-NEXT: vzeroupper 2094; AVX512BW-NEXT: retq 2095; 2096; AVX512VL-LABEL: icmp_v2i64_v2i1: 2097; AVX512VL: # %bb.0: 2098; AVX512VL-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 2099; AVX512VL-NEXT: kmovd %k0, %eax 2100; AVX512VL-NEXT: testb %al, %al 2101; AVX512VL-NEXT: setnp %al 2102; AVX512VL-NEXT: retq 2103 %a = icmp eq <2 x i64> %0, %1 2104 %b = call i1 @llvm.vector.reduce.xor.v2i1(<2 x i1> %a) 2105 ret i1 %b 2106} 2107 2108define i1 @icmp_v4i32_v4i1(<4 x i32>, <4 x i32>) nounwind { 2109; SSE-LABEL: icmp_v4i32_v4i1: 2110; SSE: # %bb.0: 2111; SSE-NEXT: pcmpeqd %xmm1, %xmm0 2112; SSE-NEXT: movmskps %xmm0, %eax 2113; SSE-NEXT: testb %al, %al 2114; SSE-NEXT: setnp %al 2115; SSE-NEXT: ret{{[l|q]}} 2116; 2117; AVX-LABEL: icmp_v4i32_v4i1: 2118; AVX: # %bb.0: 2119; AVX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 2120; AVX-NEXT: vmovmskps %xmm0, %eax 2121; AVX-NEXT: testb %al, %al 2122; AVX-NEXT: setnp %al 2123; AVX-NEXT: retq 2124; 2125; AVX512F-LABEL: icmp_v4i32_v4i1: 2126; AVX512F: # %bb.0: 2127; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 2128; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 2129; AVX512F-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 2130; AVX512F-NEXT: kmovw %k0, %eax 2131; AVX512F-NEXT: testb $15, %al 2132; AVX512F-NEXT: setnp %al 2133; AVX512F-NEXT: vzeroupper 2134; AVX512F-NEXT: retq 2135; 2136; AVX512BW-LABEL: icmp_v4i32_v4i1: 2137; AVX512BW: # %bb.0: 2138; AVX512BW-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 2139; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 2140; AVX512BW-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 2141; AVX512BW-NEXT: kmovd %k0, %eax 2142; AVX512BW-NEXT: testb $15, %al 2143; AVX512BW-NEXT: setnp %al 2144; AVX512BW-NEXT: vzeroupper 2145; AVX512BW-NEXT: retq 2146; 2147; AVX512VL-LABEL: icmp_v4i32_v4i1: 2148; AVX512VL: # %bb.0: 2149; AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 2150; AVX512VL-NEXT: kmovd %k0, %eax 2151; AVX512VL-NEXT: testb %al, %al 2152; AVX512VL-NEXT: setnp %al 2153; AVX512VL-NEXT: retq 2154 %a = icmp eq <4 x i32> %0, %1 2155 %b = call i1 @llvm.vector.reduce.xor.v4i1(<4 x i1> %a) 2156 ret i1 %b 2157} 2158 2159define i1 @icmp_v8i16_v8i1(<8 x i16>, <8 x i16>) nounwind { 2160; SSE-LABEL: icmp_v8i16_v8i1: 2161; SSE: # %bb.0: 2162; SSE-NEXT: pcmpeqw %xmm1, %xmm0 2163; SSE-NEXT: packsswb %xmm0, %xmm0 2164; SSE-NEXT: pmovmskb %xmm0, %eax 2165; SSE-NEXT: testb %al, %al 2166; SSE-NEXT: setnp %al 2167; SSE-NEXT: ret{{[l|q]}} 2168; 2169; AVX-LABEL: icmp_v8i16_v8i1: 2170; AVX: # %bb.0: 2171; AVX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 2172; AVX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 2173; AVX-NEXT: vpmovmskb %xmm0, %eax 2174; AVX-NEXT: testb %al, %al 2175; AVX-NEXT: setnp %al 2176; AVX-NEXT: retq 2177; 2178; AVX512F-LABEL: icmp_v8i16_v8i1: 2179; AVX512F: # %bb.0: 2180; AVX512F-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 2181; AVX512F-NEXT: vpmovsxwq %xmm0, %zmm0 2182; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0 2183; AVX512F-NEXT: kmovw %k0, %eax 2184; AVX512F-NEXT: testb %al, %al 2185; AVX512F-NEXT: setnp %al 2186; AVX512F-NEXT: vzeroupper 2187; AVX512F-NEXT: retq 2188; 2189; AVX512BW-LABEL: icmp_v8i16_v8i1: 2190; AVX512BW: # %bb.0: 2191; AVX512BW-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 2192; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 2193; AVX512BW-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 2194; AVX512BW-NEXT: kmovd %k0, %eax 2195; AVX512BW-NEXT: testb %al, %al 2196; AVX512BW-NEXT: setnp %al 2197; AVX512BW-NEXT: vzeroupper 2198; AVX512BW-NEXT: retq 2199; 2200; AVX512VL-LABEL: icmp_v8i16_v8i1: 2201; AVX512VL: # %bb.0: 2202; AVX512VL-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 2203; AVX512VL-NEXT: kmovd %k0, %eax 2204; AVX512VL-NEXT: testb %al, %al 2205; AVX512VL-NEXT: setnp %al 2206; AVX512VL-NEXT: retq 2207 %a = icmp eq <8 x i16> %0, %1 2208 %b = call i1 @llvm.vector.reduce.xor.v8i1(<8 x i1> %a) 2209 ret i1 %b 2210} 2211 2212define i1 @icmp_v16i8_v16i1(<16 x i8>, <16 x i8>) nounwind { 2213; SSE-LABEL: icmp_v16i8_v16i1: 2214; SSE: # %bb.0: 2215; SSE-NEXT: pcmpeqb %xmm1, %xmm0 2216; SSE-NEXT: pmovmskb %xmm0, %eax 2217; SSE-NEXT: xorb %ah, %al 2218; SSE-NEXT: setnp %al 2219; SSE-NEXT: ret{{[l|q]}} 2220; 2221; AVX-LABEL: icmp_v16i8_v16i1: 2222; AVX: # %bb.0: 2223; AVX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 2224; AVX-NEXT: vpmovmskb %xmm0, %eax 2225; AVX-NEXT: xorb %ah, %al 2226; AVX-NEXT: setnp %al 2227; AVX-NEXT: retq 2228; 2229; AVX512F-LABEL: icmp_v16i8_v16i1: 2230; AVX512F: # %bb.0: 2231; AVX512F-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 2232; AVX512F-NEXT: vpmovmskb %xmm0, %eax 2233; AVX512F-NEXT: xorb %ah, %al 2234; AVX512F-NEXT: setnp %al 2235; AVX512F-NEXT: retq 2236; 2237; AVX512BW-LABEL: icmp_v16i8_v16i1: 2238; AVX512BW: # %bb.0: 2239; AVX512BW-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 2240; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 2241; AVX512BW-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 2242; AVX512BW-NEXT: kmovd %k0, %eax 2243; AVX512BW-NEXT: movl %eax, %ecx 2244; AVX512BW-NEXT: shrl $8, %ecx 2245; AVX512BW-NEXT: xorb %al, %cl 2246; AVX512BW-NEXT: setnp %al 2247; AVX512BW-NEXT: vzeroupper 2248; AVX512BW-NEXT: retq 2249; 2250; AVX512VL-LABEL: icmp_v16i8_v16i1: 2251; AVX512VL: # %bb.0: 2252; AVX512VL-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 2253; AVX512VL-NEXT: kmovd %k0, %eax 2254; AVX512VL-NEXT: movl %eax, %ecx 2255; AVX512VL-NEXT: shrl $8, %ecx 2256; AVX512VL-NEXT: xorb %al, %cl 2257; AVX512VL-NEXT: setnp %al 2258; AVX512VL-NEXT: retq 2259 %a = icmp eq <16 x i8> %0, %1 2260 %b = call i1 @llvm.vector.reduce.xor.v16i1(<16 x i1> %a) 2261 ret i1 %b 2262} 2263 2264define i1 @icmp_v4i64_v4i1(<4 x i64>, <4 x i64>) nounwind { 2265; X86-SSE2-LABEL: icmp_v4i64_v4i1: 2266; X86-SSE2: # %bb.0: 2267; X86-SSE2-NEXT: pushl %ebp 2268; X86-SSE2-NEXT: movl %esp, %ebp 2269; X86-SSE2-NEXT: andl $-16, %esp 2270; X86-SSE2-NEXT: subl $16, %esp 2271; X86-SSE2-NEXT: pcmpeqd %xmm2, %xmm0 2272; X86-SSE2-NEXT: pcmpeqd 8(%ebp), %xmm1 2273; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 2274; X86-SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,3],xmm1[1,3] 2275; X86-SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] 2276; X86-SSE2-NEXT: andps %xmm2, %xmm0 2277; X86-SSE2-NEXT: movmskps %xmm0, %eax 2278; X86-SSE2-NEXT: testb %al, %al 2279; X86-SSE2-NEXT: setnp %al 2280; X86-SSE2-NEXT: movl %ebp, %esp 2281; X86-SSE2-NEXT: popl %ebp 2282; X86-SSE2-NEXT: retl 2283; 2284; X64-SSE2-LABEL: icmp_v4i64_v4i1: 2285; X64-SSE2: # %bb.0: 2286; X64-SSE2-NEXT: pcmpeqd %xmm3, %xmm1 2287; X64-SSE2-NEXT: pcmpeqd %xmm2, %xmm0 2288; X64-SSE2-NEXT: movdqa %xmm0, %xmm2 2289; X64-SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,3],xmm1[1,3] 2290; X64-SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] 2291; X64-SSE2-NEXT: andps %xmm2, %xmm0 2292; X64-SSE2-NEXT: movmskps %xmm0, %eax 2293; X64-SSE2-NEXT: testb %al, %al 2294; X64-SSE2-NEXT: setnp %al 2295; X64-SSE2-NEXT: retq 2296; 2297; SSE41-LABEL: icmp_v4i64_v4i1: 2298; SSE41: # %bb.0: 2299; SSE41-NEXT: pcmpeqq %xmm3, %xmm1 2300; SSE41-NEXT: pcmpeqq %xmm2, %xmm0 2301; SSE41-NEXT: packssdw %xmm1, %xmm0 2302; SSE41-NEXT: movmskps %xmm0, %eax 2303; SSE41-NEXT: testb %al, %al 2304; SSE41-NEXT: setnp %al 2305; SSE41-NEXT: retq 2306; 2307; AVX1-LABEL: icmp_v4i64_v4i1: 2308; AVX1: # %bb.0: 2309; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 2310; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 2311; AVX1-NEXT: vpcmpeqq %xmm2, %xmm3, %xmm2 2312; AVX1-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 2313; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 2314; AVX1-NEXT: vmovmskpd %ymm0, %eax 2315; AVX1-NEXT: testb %al, %al 2316; AVX1-NEXT: setnp %al 2317; AVX1-NEXT: vzeroupper 2318; AVX1-NEXT: retq 2319; 2320; AVX2-LABEL: icmp_v4i64_v4i1: 2321; AVX2: # %bb.0: 2322; AVX2-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 2323; AVX2-NEXT: vmovmskpd %ymm0, %eax 2324; AVX2-NEXT: testb %al, %al 2325; AVX2-NEXT: setnp %al 2326; AVX2-NEXT: vzeroupper 2327; AVX2-NEXT: retq 2328; 2329; AVX512F-LABEL: icmp_v4i64_v4i1: 2330; AVX512F: # %bb.0: 2331; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 2332; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 2333; AVX512F-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 2334; AVX512F-NEXT: kmovw %k0, %eax 2335; AVX512F-NEXT: testb $15, %al 2336; AVX512F-NEXT: setnp %al 2337; AVX512F-NEXT: vzeroupper 2338; AVX512F-NEXT: retq 2339; 2340; AVX512BW-LABEL: icmp_v4i64_v4i1: 2341; AVX512BW: # %bb.0: 2342; AVX512BW-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 2343; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 2344; AVX512BW-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 2345; AVX512BW-NEXT: kmovd %k0, %eax 2346; AVX512BW-NEXT: testb $15, %al 2347; AVX512BW-NEXT: setnp %al 2348; AVX512BW-NEXT: vzeroupper 2349; AVX512BW-NEXT: retq 2350; 2351; AVX512VL-LABEL: icmp_v4i64_v4i1: 2352; AVX512VL: # %bb.0: 2353; AVX512VL-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 2354; AVX512VL-NEXT: kmovd %k0, %eax 2355; AVX512VL-NEXT: testb %al, %al 2356; AVX512VL-NEXT: setnp %al 2357; AVX512VL-NEXT: vzeroupper 2358; AVX512VL-NEXT: retq 2359 %a = icmp eq <4 x i64> %0, %1 2360 %b = call i1 @llvm.vector.reduce.xor.v4i1(<4 x i1> %a) 2361 ret i1 %b 2362} 2363 2364define i1 @icmp_v8i32_v8i1(<8 x i32>, <8 x i32>) nounwind { 2365; X86-SSE2-LABEL: icmp_v8i32_v8i1: 2366; X86-SSE2: # %bb.0: 2367; X86-SSE2-NEXT: pushl %ebp 2368; X86-SSE2-NEXT: movl %esp, %ebp 2369; X86-SSE2-NEXT: andl $-16, %esp 2370; X86-SSE2-NEXT: subl $16, %esp 2371; X86-SSE2-NEXT: pcmpeqd %xmm2, %xmm0 2372; X86-SSE2-NEXT: pcmpeqd 8(%ebp), %xmm1 2373; X86-SSE2-NEXT: packssdw %xmm1, %xmm0 2374; X86-SSE2-NEXT: packsswb %xmm0, %xmm0 2375; X86-SSE2-NEXT: pmovmskb %xmm0, %eax 2376; X86-SSE2-NEXT: testb %al, %al 2377; X86-SSE2-NEXT: setnp %al 2378; X86-SSE2-NEXT: movl %ebp, %esp 2379; X86-SSE2-NEXT: popl %ebp 2380; X86-SSE2-NEXT: retl 2381; 2382; X64-SSE-LABEL: icmp_v8i32_v8i1: 2383; X64-SSE: # %bb.0: 2384; X64-SSE-NEXT: pcmpeqd %xmm3, %xmm1 2385; X64-SSE-NEXT: pcmpeqd %xmm2, %xmm0 2386; X64-SSE-NEXT: packssdw %xmm1, %xmm0 2387; X64-SSE-NEXT: packsswb %xmm0, %xmm0 2388; X64-SSE-NEXT: pmovmskb %xmm0, %eax 2389; X64-SSE-NEXT: testb %al, %al 2390; X64-SSE-NEXT: setnp %al 2391; X64-SSE-NEXT: retq 2392; 2393; AVX1-LABEL: icmp_v8i32_v8i1: 2394; AVX1: # %bb.0: 2395; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 2396; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 2397; AVX1-NEXT: vpcmpeqd %xmm2, %xmm3, %xmm2 2398; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 2399; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 2400; AVX1-NEXT: vmovmskps %ymm0, %eax 2401; AVX1-NEXT: testb %al, %al 2402; AVX1-NEXT: setnp %al 2403; AVX1-NEXT: vzeroupper 2404; AVX1-NEXT: retq 2405; 2406; AVX2-LABEL: icmp_v8i32_v8i1: 2407; AVX2: # %bb.0: 2408; AVX2-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 2409; AVX2-NEXT: vmovmskps %ymm0, %eax 2410; AVX2-NEXT: testb %al, %al 2411; AVX2-NEXT: setnp %al 2412; AVX2-NEXT: vzeroupper 2413; AVX2-NEXT: retq 2414; 2415; AVX512F-LABEL: icmp_v8i32_v8i1: 2416; AVX512F: # %bb.0: 2417; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 2418; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 2419; AVX512F-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 2420; AVX512F-NEXT: kmovw %k0, %eax 2421; AVX512F-NEXT: testb %al, %al 2422; AVX512F-NEXT: setnp %al 2423; AVX512F-NEXT: vzeroupper 2424; AVX512F-NEXT: retq 2425; 2426; AVX512BW-LABEL: icmp_v8i32_v8i1: 2427; AVX512BW: # %bb.0: 2428; AVX512BW-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 2429; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 2430; AVX512BW-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 2431; AVX512BW-NEXT: kmovd %k0, %eax 2432; AVX512BW-NEXT: testb %al, %al 2433; AVX512BW-NEXT: setnp %al 2434; AVX512BW-NEXT: vzeroupper 2435; AVX512BW-NEXT: retq 2436; 2437; AVX512VL-LABEL: icmp_v8i32_v8i1: 2438; AVX512VL: # %bb.0: 2439; AVX512VL-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 2440; AVX512VL-NEXT: kmovd %k0, %eax 2441; AVX512VL-NEXT: testb %al, %al 2442; AVX512VL-NEXT: setnp %al 2443; AVX512VL-NEXT: vzeroupper 2444; AVX512VL-NEXT: retq 2445 %a = icmp eq <8 x i32> %0, %1 2446 %b = call i1 @llvm.vector.reduce.xor.v8i1(<8 x i1> %a) 2447 ret i1 %b 2448} 2449 2450define i1 @icmp_v16i16_v16i1(<16 x i16>, <16 x i16>) nounwind { 2451; X86-SSE2-LABEL: icmp_v16i16_v16i1: 2452; X86-SSE2: # %bb.0: 2453; X86-SSE2-NEXT: pushl %ebp 2454; X86-SSE2-NEXT: movl %esp, %ebp 2455; X86-SSE2-NEXT: andl $-16, %esp 2456; X86-SSE2-NEXT: subl $16, %esp 2457; X86-SSE2-NEXT: pcmpeqw %xmm2, %xmm0 2458; X86-SSE2-NEXT: pcmpeqw 8(%ebp), %xmm1 2459; X86-SSE2-NEXT: packsswb %xmm1, %xmm0 2460; X86-SSE2-NEXT: pmovmskb %xmm0, %eax 2461; X86-SSE2-NEXT: xorb %ah, %al 2462; X86-SSE2-NEXT: setnp %al 2463; X86-SSE2-NEXT: movl %ebp, %esp 2464; X86-SSE2-NEXT: popl %ebp 2465; X86-SSE2-NEXT: retl 2466; 2467; X64-SSE-LABEL: icmp_v16i16_v16i1: 2468; X64-SSE: # %bb.0: 2469; X64-SSE-NEXT: pcmpeqw %xmm3, %xmm1 2470; X64-SSE-NEXT: pcmpeqw %xmm2, %xmm0 2471; X64-SSE-NEXT: packsswb %xmm1, %xmm0 2472; X64-SSE-NEXT: pmovmskb %xmm0, %eax 2473; X64-SSE-NEXT: xorb %ah, %al 2474; X64-SSE-NEXT: setnp %al 2475; X64-SSE-NEXT: retq 2476; 2477; AVX1-LABEL: icmp_v16i16_v16i1: 2478; AVX1: # %bb.0: 2479; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 2480; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 2481; AVX1-NEXT: vpcmpeqw %xmm2, %xmm3, %xmm2 2482; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 2483; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0 2484; AVX1-NEXT: vpmovmskb %xmm0, %eax 2485; AVX1-NEXT: xorb %ah, %al 2486; AVX1-NEXT: setnp %al 2487; AVX1-NEXT: vzeroupper 2488; AVX1-NEXT: retq 2489; 2490; AVX2-LABEL: icmp_v16i16_v16i1: 2491; AVX2: # %bb.0: 2492; AVX2-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 2493; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 2494; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 2495; AVX2-NEXT: vpmovmskb %xmm0, %eax 2496; AVX2-NEXT: xorb %ah, %al 2497; AVX2-NEXT: setnp %al 2498; AVX2-NEXT: vzeroupper 2499; AVX2-NEXT: retq 2500; 2501; AVX512F-LABEL: icmp_v16i16_v16i1: 2502; AVX512F: # %bb.0: 2503; AVX512F-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 2504; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0 2505; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 2506; AVX512F-NEXT: kmovw %k0, %eax 2507; AVX512F-NEXT: movl %eax, %ecx 2508; AVX512F-NEXT: shrl $8, %ecx 2509; AVX512F-NEXT: xorb %al, %cl 2510; AVX512F-NEXT: setnp %al 2511; AVX512F-NEXT: vzeroupper 2512; AVX512F-NEXT: retq 2513; 2514; AVX512BW-LABEL: icmp_v16i16_v16i1: 2515; AVX512BW: # %bb.0: 2516; AVX512BW-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 2517; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 2518; AVX512BW-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 2519; AVX512BW-NEXT: kmovd %k0, %eax 2520; AVX512BW-NEXT: movl %eax, %ecx 2521; AVX512BW-NEXT: shrl $8, %ecx 2522; AVX512BW-NEXT: xorb %al, %cl 2523; AVX512BW-NEXT: setnp %al 2524; AVX512BW-NEXT: vzeroupper 2525; AVX512BW-NEXT: retq 2526; 2527; AVX512VL-LABEL: icmp_v16i16_v16i1: 2528; AVX512VL: # %bb.0: 2529; AVX512VL-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 2530; AVX512VL-NEXT: kmovd %k0, %eax 2531; AVX512VL-NEXT: movl %eax, %ecx 2532; AVX512VL-NEXT: shrl $8, %ecx 2533; AVX512VL-NEXT: xorb %al, %cl 2534; AVX512VL-NEXT: setnp %al 2535; AVX512VL-NEXT: vzeroupper 2536; AVX512VL-NEXT: retq 2537 %a = icmp eq <16 x i16> %0, %1 2538 %b = call i1 @llvm.vector.reduce.xor.v16i1(<16 x i1> %a) 2539 ret i1 %b 2540} 2541 2542define i1 @icmp_v32i8_v32i1(<32 x i8>, <32 x i8>) nounwind { 2543; X86-SSE2-LABEL: icmp_v32i8_v32i1: 2544; X86-SSE2: # %bb.0: 2545; X86-SSE2-NEXT: pushl %ebp 2546; X86-SSE2-NEXT: movl %esp, %ebp 2547; X86-SSE2-NEXT: andl $-16, %esp 2548; X86-SSE2-NEXT: subl $16, %esp 2549; X86-SSE2-NEXT: pcmpeqb %xmm2, %xmm0 2550; X86-SSE2-NEXT: pcmpeqb 8(%ebp), %xmm1 2551; X86-SSE2-NEXT: pxor %xmm0, %xmm1 2552; X86-SSE2-NEXT: pmovmskb %xmm1, %eax 2553; X86-SSE2-NEXT: xorb %ah, %al 2554; X86-SSE2-NEXT: setnp %al 2555; X86-SSE2-NEXT: movl %ebp, %esp 2556; X86-SSE2-NEXT: popl %ebp 2557; X86-SSE2-NEXT: retl 2558; 2559; X64-SSE-LABEL: icmp_v32i8_v32i1: 2560; X64-SSE: # %bb.0: 2561; X64-SSE-NEXT: pcmpeqb %xmm3, %xmm1 2562; X64-SSE-NEXT: pcmpeqb %xmm2, %xmm0 2563; X64-SSE-NEXT: pxor %xmm1, %xmm0 2564; X64-SSE-NEXT: pmovmskb %xmm0, %eax 2565; X64-SSE-NEXT: xorb %ah, %al 2566; X64-SSE-NEXT: setnp %al 2567; X64-SSE-NEXT: retq 2568; 2569; AVX1-LABEL: icmp_v32i8_v32i1: 2570; AVX1: # %bb.0: 2571; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 2572; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 2573; AVX1-NEXT: vpcmpeqb %xmm2, %xmm3, %xmm2 2574; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 2575; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm0 2576; AVX1-NEXT: vpmovmskb %xmm0, %eax 2577; AVX1-NEXT: xorb %ah, %al 2578; AVX1-NEXT: setnp %al 2579; AVX1-NEXT: vzeroupper 2580; AVX1-NEXT: retq 2581; 2582; AVX2-LABEL: icmp_v32i8_v32i1: 2583; AVX2: # %bb.0: 2584; AVX2-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 2585; AVX2-NEXT: vpmovmskb %ymm0, %eax 2586; AVX2-NEXT: movl %eax, %ecx 2587; AVX2-NEXT: shrl $16, %ecx 2588; AVX2-NEXT: xorl %eax, %ecx 2589; AVX2-NEXT: xorb %ch, %cl 2590; AVX2-NEXT: setnp %al 2591; AVX2-NEXT: vzeroupper 2592; AVX2-NEXT: retq 2593; 2594; AVX512F-LABEL: icmp_v32i8_v32i1: 2595; AVX512F: # %bb.0: 2596; AVX512F-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 2597; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 2598; AVX512F-NEXT: vpxor %xmm1, %xmm0, %xmm0 2599; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 2600; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 2601; AVX512F-NEXT: kshiftrw $8, %k0, %k1 2602; AVX512F-NEXT: kxorw %k1, %k0, %k0 2603; AVX512F-NEXT: kshiftrw $4, %k0, %k1 2604; AVX512F-NEXT: kxorw %k1, %k0, %k0 2605; AVX512F-NEXT: kshiftrw $2, %k0, %k1 2606; AVX512F-NEXT: kxorw %k1, %k0, %k0 2607; AVX512F-NEXT: kshiftrw $1, %k0, %k1 2608; AVX512F-NEXT: kxorw %k1, %k0, %k0 2609; AVX512F-NEXT: kmovw %k0, %eax 2610; AVX512F-NEXT: # kill: def $al killed $al killed $eax 2611; AVX512F-NEXT: vzeroupper 2612; AVX512F-NEXT: retq 2613; 2614; AVX512BW-LABEL: icmp_v32i8_v32i1: 2615; AVX512BW: # %bb.0: 2616; AVX512BW-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 2617; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 2618; AVX512BW-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 2619; AVX512BW-NEXT: kmovd %k0, %eax 2620; AVX512BW-NEXT: movl %eax, %ecx 2621; AVX512BW-NEXT: shrl $16, %ecx 2622; AVX512BW-NEXT: xorl %eax, %ecx 2623; AVX512BW-NEXT: xorb %ch, %cl 2624; AVX512BW-NEXT: setnp %al 2625; AVX512BW-NEXT: vzeroupper 2626; AVX512BW-NEXT: retq 2627; 2628; AVX512VL-LABEL: icmp_v32i8_v32i1: 2629; AVX512VL: # %bb.0: 2630; AVX512VL-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 2631; AVX512VL-NEXT: kmovd %k0, %eax 2632; AVX512VL-NEXT: movl %eax, %ecx 2633; AVX512VL-NEXT: shrl $16, %ecx 2634; AVX512VL-NEXT: xorl %eax, %ecx 2635; AVX512VL-NEXT: xorb %ch, %cl 2636; AVX512VL-NEXT: setnp %al 2637; AVX512VL-NEXT: vzeroupper 2638; AVX512VL-NEXT: retq 2639 %a = icmp eq <32 x i8> %0, %1 2640 %b = call i1 @llvm.vector.reduce.xor.v32i1(<32 x i1> %a) 2641 ret i1 %b 2642} 2643 2644define i1 @icmp_v8i64_v8i1(<8 x i64>, <8 x i64>) nounwind { 2645; X86-SSE2-LABEL: icmp_v8i64_v8i1: 2646; X86-SSE2: # %bb.0: 2647; X86-SSE2-NEXT: pushl %ebp 2648; X86-SSE2-NEXT: movl %esp, %ebp 2649; X86-SSE2-NEXT: andl $-16, %esp 2650; X86-SSE2-NEXT: subl $16, %esp 2651; X86-SSE2-NEXT: movdqa 8(%ebp), %xmm3 2652; X86-SSE2-NEXT: pcmpeqd 72(%ebp), %xmm3 2653; X86-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[1,0,3,2] 2654; X86-SSE2-NEXT: pand %xmm3, %xmm4 2655; X86-SSE2-NEXT: pcmpeqd 56(%ebp), %xmm2 2656; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,0,3,2] 2657; X86-SSE2-NEXT: pand %xmm2, %xmm3 2658; X86-SSE2-NEXT: packssdw %xmm4, %xmm3 2659; X86-SSE2-NEXT: pcmpeqd 40(%ebp), %xmm1 2660; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,0,3,2] 2661; X86-SSE2-NEXT: pand %xmm1, %xmm2 2662; X86-SSE2-NEXT: pcmpeqd 24(%ebp), %xmm0 2663; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] 2664; X86-SSE2-NEXT: pand %xmm0, %xmm1 2665; X86-SSE2-NEXT: packssdw %xmm2, %xmm1 2666; X86-SSE2-NEXT: packssdw %xmm3, %xmm1 2667; X86-SSE2-NEXT: packsswb %xmm1, %xmm1 2668; X86-SSE2-NEXT: pmovmskb %xmm1, %eax 2669; X86-SSE2-NEXT: testb %al, %al 2670; X86-SSE2-NEXT: setnp %al 2671; X86-SSE2-NEXT: movl %ebp, %esp 2672; X86-SSE2-NEXT: popl %ebp 2673; X86-SSE2-NEXT: retl 2674; 2675; X64-SSE2-LABEL: icmp_v8i64_v8i1: 2676; X64-SSE2: # %bb.0: 2677; X64-SSE2-NEXT: pcmpeqd %xmm7, %xmm3 2678; X64-SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm3[1,0,3,2] 2679; X64-SSE2-NEXT: pand %xmm3, %xmm7 2680; X64-SSE2-NEXT: pcmpeqd %xmm6, %xmm2 2681; X64-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,0,3,2] 2682; X64-SSE2-NEXT: pand %xmm2, %xmm3 2683; X64-SSE2-NEXT: packssdw %xmm7, %xmm3 2684; X64-SSE2-NEXT: pcmpeqd %xmm5, %xmm1 2685; X64-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,0,3,2] 2686; X64-SSE2-NEXT: pand %xmm1, %xmm2 2687; X64-SSE2-NEXT: pcmpeqd %xmm4, %xmm0 2688; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] 2689; X64-SSE2-NEXT: pand %xmm0, %xmm1 2690; X64-SSE2-NEXT: packssdw %xmm2, %xmm1 2691; X64-SSE2-NEXT: packssdw %xmm3, %xmm1 2692; X64-SSE2-NEXT: packsswb %xmm1, %xmm1 2693; X64-SSE2-NEXT: pmovmskb %xmm1, %eax 2694; X64-SSE2-NEXT: testb %al, %al 2695; X64-SSE2-NEXT: setnp %al 2696; X64-SSE2-NEXT: retq 2697; 2698; SSE41-LABEL: icmp_v8i64_v8i1: 2699; SSE41: # %bb.0: 2700; SSE41-NEXT: pcmpeqq %xmm7, %xmm3 2701; SSE41-NEXT: pcmpeqq %xmm6, %xmm2 2702; SSE41-NEXT: packssdw %xmm3, %xmm2 2703; SSE41-NEXT: pcmpeqq %xmm5, %xmm1 2704; SSE41-NEXT: pcmpeqq %xmm4, %xmm0 2705; SSE41-NEXT: packssdw %xmm1, %xmm0 2706; SSE41-NEXT: packssdw %xmm2, %xmm0 2707; SSE41-NEXT: packsswb %xmm0, %xmm0 2708; SSE41-NEXT: pmovmskb %xmm0, %eax 2709; SSE41-NEXT: testb %al, %al 2710; SSE41-NEXT: setnp %al 2711; SSE41-NEXT: retq 2712; 2713; AVX1-LABEL: icmp_v8i64_v8i1: 2714; AVX1: # %bb.0: 2715; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 2716; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm5 2717; AVX1-NEXT: vpcmpeqq %xmm4, %xmm5, %xmm4 2718; AVX1-NEXT: vpcmpeqq %xmm3, %xmm1, %xmm1 2719; AVX1-NEXT: vpackssdw %xmm4, %xmm1, %xmm1 2720; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3 2721; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 2722; AVX1-NEXT: vpcmpeqq %xmm3, %xmm4, %xmm3 2723; AVX1-NEXT: vpcmpeqq %xmm2, %xmm0, %xmm0 2724; AVX1-NEXT: vpackssdw %xmm3, %xmm0, %xmm0 2725; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 2726; AVX1-NEXT: vmovmskps %ymm0, %eax 2727; AVX1-NEXT: testb %al, %al 2728; AVX1-NEXT: setnp %al 2729; AVX1-NEXT: vzeroupper 2730; AVX1-NEXT: retq 2731; 2732; AVX2-LABEL: icmp_v8i64_v8i1: 2733; AVX2: # %bb.0: 2734; AVX2-NEXT: vpcmpeqq %ymm3, %ymm1, %ymm1 2735; AVX2-NEXT: vpcmpeqq %ymm2, %ymm0, %ymm0 2736; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 2737; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] 2738; AVX2-NEXT: vmovmskps %ymm0, %eax 2739; AVX2-NEXT: testb %al, %al 2740; AVX2-NEXT: setnp %al 2741; AVX2-NEXT: vzeroupper 2742; AVX2-NEXT: retq 2743; 2744; AVX512F-LABEL: icmp_v8i64_v8i1: 2745; AVX512F: # %bb.0: 2746; AVX512F-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 2747; AVX512F-NEXT: kmovw %k0, %eax 2748; AVX512F-NEXT: testb %al, %al 2749; AVX512F-NEXT: setnp %al 2750; AVX512F-NEXT: vzeroupper 2751; AVX512F-NEXT: retq 2752; 2753; AVX512BW-LABEL: icmp_v8i64_v8i1: 2754; AVX512BW: # %bb.0: 2755; AVX512BW-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 2756; AVX512BW-NEXT: kmovd %k0, %eax 2757; AVX512BW-NEXT: testb %al, %al 2758; AVX512BW-NEXT: setnp %al 2759; AVX512BW-NEXT: vzeroupper 2760; AVX512BW-NEXT: retq 2761; 2762; AVX512VL-LABEL: icmp_v8i64_v8i1: 2763; AVX512VL: # %bb.0: 2764; AVX512VL-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 2765; AVX512VL-NEXT: kmovd %k0, %eax 2766; AVX512VL-NEXT: testb %al, %al 2767; AVX512VL-NEXT: setnp %al 2768; AVX512VL-NEXT: vzeroupper 2769; AVX512VL-NEXT: retq 2770 %a = icmp eq <8 x i64> %0, %1 2771 %b = call i1 @llvm.vector.reduce.xor.v8i1(<8 x i1> %a) 2772 ret i1 %b 2773} 2774 2775define i1 @icmp_v16i32_v16i1(<16 x i32>, <16 x i32>) nounwind { 2776; X86-SSE2-LABEL: icmp_v16i32_v16i1: 2777; X86-SSE2: # %bb.0: 2778; X86-SSE2-NEXT: pushl %ebp 2779; X86-SSE2-NEXT: movl %esp, %ebp 2780; X86-SSE2-NEXT: andl $-16, %esp 2781; X86-SSE2-NEXT: subl $16, %esp 2782; X86-SSE2-NEXT: movdqa 8(%ebp), %xmm3 2783; X86-SSE2-NEXT: pcmpeqd 72(%ebp), %xmm3 2784; X86-SSE2-NEXT: pcmpeqd 56(%ebp), %xmm2 2785; X86-SSE2-NEXT: packssdw %xmm3, %xmm2 2786; X86-SSE2-NEXT: pcmpeqd 40(%ebp), %xmm1 2787; X86-SSE2-NEXT: pcmpeqd 24(%ebp), %xmm0 2788; X86-SSE2-NEXT: packssdw %xmm1, %xmm0 2789; X86-SSE2-NEXT: packsswb %xmm2, %xmm0 2790; X86-SSE2-NEXT: pmovmskb %xmm0, %eax 2791; X86-SSE2-NEXT: xorb %ah, %al 2792; X86-SSE2-NEXT: setnp %al 2793; X86-SSE2-NEXT: movl %ebp, %esp 2794; X86-SSE2-NEXT: popl %ebp 2795; X86-SSE2-NEXT: retl 2796; 2797; X64-SSE-LABEL: icmp_v16i32_v16i1: 2798; X64-SSE: # %bb.0: 2799; X64-SSE-NEXT: pcmpeqd %xmm7, %xmm3 2800; X64-SSE-NEXT: pcmpeqd %xmm6, %xmm2 2801; X64-SSE-NEXT: packssdw %xmm3, %xmm2 2802; X64-SSE-NEXT: pcmpeqd %xmm5, %xmm1 2803; X64-SSE-NEXT: pcmpeqd %xmm4, %xmm0 2804; X64-SSE-NEXT: packssdw %xmm1, %xmm0 2805; X64-SSE-NEXT: packsswb %xmm2, %xmm0 2806; X64-SSE-NEXT: pmovmskb %xmm0, %eax 2807; X64-SSE-NEXT: xorb %ah, %al 2808; X64-SSE-NEXT: setnp %al 2809; X64-SSE-NEXT: retq 2810; 2811; AVX1-LABEL: icmp_v16i32_v16i1: 2812; AVX1: # %bb.0: 2813; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 2814; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm5 2815; AVX1-NEXT: vpcmpeqd %xmm4, %xmm5, %xmm4 2816; AVX1-NEXT: vpcmpeqd %xmm3, %xmm1, %xmm1 2817; AVX1-NEXT: vpackssdw %xmm4, %xmm1, %xmm1 2818; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3 2819; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 2820; AVX1-NEXT: vpcmpeqd %xmm3, %xmm4, %xmm3 2821; AVX1-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0 2822; AVX1-NEXT: vpackssdw %xmm3, %xmm0, %xmm0 2823; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 2824; AVX1-NEXT: vpmovmskb %xmm0, %eax 2825; AVX1-NEXT: xorb %ah, %al 2826; AVX1-NEXT: setnp %al 2827; AVX1-NEXT: vzeroupper 2828; AVX1-NEXT: retq 2829; 2830; AVX2-LABEL: icmp_v16i32_v16i1: 2831; AVX2: # %bb.0: 2832; AVX2-NEXT: vpcmpeqd %ymm3, %ymm1, %ymm1 2833; AVX2-NEXT: vpcmpeqd %ymm2, %ymm0, %ymm0 2834; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 2835; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 2836; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 2837; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] 2838; AVX2-NEXT: vpmovmskb %xmm0, %eax 2839; AVX2-NEXT: xorb %ah, %al 2840; AVX2-NEXT: setnp %al 2841; AVX2-NEXT: vzeroupper 2842; AVX2-NEXT: retq 2843; 2844; AVX512F-LABEL: icmp_v16i32_v16i1: 2845; AVX512F: # %bb.0: 2846; AVX512F-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 2847; AVX512F-NEXT: kmovw %k0, %eax 2848; AVX512F-NEXT: movl %eax, %ecx 2849; AVX512F-NEXT: shrl $8, %ecx 2850; AVX512F-NEXT: xorb %al, %cl 2851; AVX512F-NEXT: setnp %al 2852; AVX512F-NEXT: vzeroupper 2853; AVX512F-NEXT: retq 2854; 2855; AVX512BW-LABEL: icmp_v16i32_v16i1: 2856; AVX512BW: # %bb.0: 2857; AVX512BW-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 2858; AVX512BW-NEXT: kmovd %k0, %eax 2859; AVX512BW-NEXT: movl %eax, %ecx 2860; AVX512BW-NEXT: shrl $8, %ecx 2861; AVX512BW-NEXT: xorb %al, %cl 2862; AVX512BW-NEXT: setnp %al 2863; AVX512BW-NEXT: vzeroupper 2864; AVX512BW-NEXT: retq 2865; 2866; AVX512VL-LABEL: icmp_v16i32_v16i1: 2867; AVX512VL: # %bb.0: 2868; AVX512VL-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 2869; AVX512VL-NEXT: kmovd %k0, %eax 2870; AVX512VL-NEXT: movl %eax, %ecx 2871; AVX512VL-NEXT: shrl $8, %ecx 2872; AVX512VL-NEXT: xorb %al, %cl 2873; AVX512VL-NEXT: setnp %al 2874; AVX512VL-NEXT: vzeroupper 2875; AVX512VL-NEXT: retq 2876 %a = icmp eq <16 x i32> %0, %1 2877 %b = call i1 @llvm.vector.reduce.xor.v16i1(<16 x i1> %a) 2878 ret i1 %b 2879} 2880 2881define i1 @icmp_v32i16_v32i1(<32 x i16>, <32 x i16>) nounwind { 2882; X86-SSE2-LABEL: icmp_v32i16_v32i1: 2883; X86-SSE2: # %bb.0: 2884; X86-SSE2-NEXT: pushl %ebp 2885; X86-SSE2-NEXT: movl %esp, %ebp 2886; X86-SSE2-NEXT: andl $-16, %esp 2887; X86-SSE2-NEXT: subl $16, %esp 2888; X86-SSE2-NEXT: movdqa 8(%ebp), %xmm3 2889; X86-SSE2-NEXT: pcmpeqw 56(%ebp), %xmm2 2890; X86-SSE2-NEXT: pcmpeqw 24(%ebp), %xmm0 2891; X86-SSE2-NEXT: pxor %xmm2, %xmm0 2892; X86-SSE2-NEXT: pcmpeqw 72(%ebp), %xmm3 2893; X86-SSE2-NEXT: pcmpeqw 40(%ebp), %xmm1 2894; X86-SSE2-NEXT: pxor %xmm3, %xmm1 2895; X86-SSE2-NEXT: packsswb %xmm1, %xmm0 2896; X86-SSE2-NEXT: pmovmskb %xmm0, %eax 2897; X86-SSE2-NEXT: xorb %ah, %al 2898; X86-SSE2-NEXT: setnp %al 2899; X86-SSE2-NEXT: movl %ebp, %esp 2900; X86-SSE2-NEXT: popl %ebp 2901; X86-SSE2-NEXT: retl 2902; 2903; X64-SSE-LABEL: icmp_v32i16_v32i1: 2904; X64-SSE: # %bb.0: 2905; X64-SSE-NEXT: pcmpeqw %xmm6, %xmm2 2906; X64-SSE-NEXT: pcmpeqw %xmm4, %xmm0 2907; X64-SSE-NEXT: pxor %xmm2, %xmm0 2908; X64-SSE-NEXT: pcmpeqw %xmm7, %xmm3 2909; X64-SSE-NEXT: pcmpeqw %xmm5, %xmm1 2910; X64-SSE-NEXT: pxor %xmm3, %xmm1 2911; X64-SSE-NEXT: packsswb %xmm1, %xmm0 2912; X64-SSE-NEXT: pmovmskb %xmm0, %eax 2913; X64-SSE-NEXT: xorb %ah, %al 2914; X64-SSE-NEXT: setnp %al 2915; X64-SSE-NEXT: retq 2916; 2917; AVX1-LABEL: icmp_v32i16_v32i1: 2918; AVX1: # %bb.0: 2919; AVX1-NEXT: vpcmpeqw %xmm3, %xmm1, %xmm4 2920; AVX1-NEXT: vpcmpeqw %xmm2, %xmm0, %xmm5 2921; AVX1-NEXT: vpxor %xmm4, %xmm5, %xmm4 2922; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm3 2923; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 2924; AVX1-NEXT: vpcmpeqw %xmm3, %xmm1, %xmm1 2925; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2 2926; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 2927; AVX1-NEXT: vpcmpeqw %xmm2, %xmm0, %xmm0 2928; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 2929; AVX1-NEXT: vpacksswb %xmm0, %xmm4, %xmm0 2930; AVX1-NEXT: vpmovmskb %xmm0, %eax 2931; AVX1-NEXT: xorb %ah, %al 2932; AVX1-NEXT: setnp %al 2933; AVX1-NEXT: vzeroupper 2934; AVX1-NEXT: retq 2935; 2936; AVX2-LABEL: icmp_v32i16_v32i1: 2937; AVX2: # %bb.0: 2938; AVX2-NEXT: vpcmpeqw %ymm3, %ymm1, %ymm1 2939; AVX2-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm0 2940; AVX2-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 2941; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] 2942; AVX2-NEXT: vpmovmskb %ymm0, %eax 2943; AVX2-NEXT: movl %eax, %ecx 2944; AVX2-NEXT: shrl $16, %ecx 2945; AVX2-NEXT: xorl %eax, %ecx 2946; AVX2-NEXT: xorb %ch, %cl 2947; AVX2-NEXT: setnp %al 2948; AVX2-NEXT: vzeroupper 2949; AVX2-NEXT: retq 2950; 2951; AVX512F-LABEL: icmp_v32i16_v32i1: 2952; AVX512F: # %bb.0: 2953; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm2 2954; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm3 2955; AVX512F-NEXT: vpcmpeqw %ymm2, %ymm3, %ymm2 2956; AVX512F-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 2957; AVX512F-NEXT: vpxor %ymm2, %ymm0, %ymm0 2958; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0 2959; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 2960; AVX512F-NEXT: kshiftrw $8, %k0, %k1 2961; AVX512F-NEXT: kxorw %k1, %k0, %k0 2962; AVX512F-NEXT: kshiftrw $4, %k0, %k1 2963; AVX512F-NEXT: kxorw %k1, %k0, %k0 2964; AVX512F-NEXT: kshiftrw $2, %k0, %k1 2965; AVX512F-NEXT: kxorw %k1, %k0, %k0 2966; AVX512F-NEXT: kshiftrw $1, %k0, %k1 2967; AVX512F-NEXT: kxorw %k1, %k0, %k0 2968; AVX512F-NEXT: kmovw %k0, %eax 2969; AVX512F-NEXT: # kill: def $al killed $al killed $eax 2970; AVX512F-NEXT: vzeroupper 2971; AVX512F-NEXT: retq 2972; 2973; AVX512BW-LABEL: icmp_v32i16_v32i1: 2974; AVX512BW: # %bb.0: 2975; AVX512BW-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 2976; AVX512BW-NEXT: kmovd %k0, %eax 2977; AVX512BW-NEXT: movl %eax, %ecx 2978; AVX512BW-NEXT: shrl $16, %ecx 2979; AVX512BW-NEXT: xorl %eax, %ecx 2980; AVX512BW-NEXT: xorb %ch, %cl 2981; AVX512BW-NEXT: setnp %al 2982; AVX512BW-NEXT: vzeroupper 2983; AVX512BW-NEXT: retq 2984; 2985; AVX512VL-LABEL: icmp_v32i16_v32i1: 2986; AVX512VL: # %bb.0: 2987; AVX512VL-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 2988; AVX512VL-NEXT: kmovd %k0, %eax 2989; AVX512VL-NEXT: movl %eax, %ecx 2990; AVX512VL-NEXT: shrl $16, %ecx 2991; AVX512VL-NEXT: xorl %eax, %ecx 2992; AVX512VL-NEXT: xorb %ch, %cl 2993; AVX512VL-NEXT: setnp %al 2994; AVX512VL-NEXT: vzeroupper 2995; AVX512VL-NEXT: retq 2996 %a = icmp eq <32 x i16> %0, %1 2997 %b = call i1 @llvm.vector.reduce.xor.v32i1(<32 x i1> %a) 2998 ret i1 %b 2999} 3000 3001define i1 @icmp_v64i8_v64i1(<64 x i8>, <64 x i8>) nounwind { 3002; X86-SSE2-LABEL: icmp_v64i8_v64i1: 3003; X86-SSE2: # %bb.0: 3004; X86-SSE2-NEXT: pushl %ebp 3005; X86-SSE2-NEXT: movl %esp, %ebp 3006; X86-SSE2-NEXT: andl $-16, %esp 3007; X86-SSE2-NEXT: subl $16, %esp 3008; X86-SSE2-NEXT: movdqa 8(%ebp), %xmm3 3009; X86-SSE2-NEXT: pcmpeqb 56(%ebp), %xmm2 3010; X86-SSE2-NEXT: pcmpeqb 24(%ebp), %xmm0 3011; X86-SSE2-NEXT: pxor %xmm2, %xmm0 3012; X86-SSE2-NEXT: pcmpeqb 72(%ebp), %xmm3 3013; X86-SSE2-NEXT: pcmpeqb 40(%ebp), %xmm1 3014; X86-SSE2-NEXT: pxor %xmm3, %xmm1 3015; X86-SSE2-NEXT: pxor %xmm0, %xmm1 3016; X86-SSE2-NEXT: pmovmskb %xmm1, %eax 3017; X86-SSE2-NEXT: xorb %ah, %al 3018; X86-SSE2-NEXT: setnp %al 3019; X86-SSE2-NEXT: movl %ebp, %esp 3020; X86-SSE2-NEXT: popl %ebp 3021; X86-SSE2-NEXT: retl 3022; 3023; X64-SSE-LABEL: icmp_v64i8_v64i1: 3024; X64-SSE: # %bb.0: 3025; X64-SSE-NEXT: pcmpeqb %xmm6, %xmm2 3026; X64-SSE-NEXT: pcmpeqb %xmm4, %xmm0 3027; X64-SSE-NEXT: pxor %xmm2, %xmm0 3028; X64-SSE-NEXT: pcmpeqb %xmm7, %xmm3 3029; X64-SSE-NEXT: pcmpeqb %xmm5, %xmm1 3030; X64-SSE-NEXT: pxor %xmm3, %xmm1 3031; X64-SSE-NEXT: pxor %xmm0, %xmm1 3032; X64-SSE-NEXT: pmovmskb %xmm1, %eax 3033; X64-SSE-NEXT: xorb %ah, %al 3034; X64-SSE-NEXT: setnp %al 3035; X64-SSE-NEXT: retq 3036; 3037; AVX1-LABEL: icmp_v64i8_v64i1: 3038; AVX1: # %bb.0: 3039; AVX1-NEXT: vpcmpeqb %xmm3, %xmm1, %xmm4 3040; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm5 3041; AVX1-NEXT: vpxor %xmm4, %xmm5, %xmm4 3042; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm3 3043; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 3044; AVX1-NEXT: vpcmpeqb %xmm3, %xmm1, %xmm1 3045; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2 3046; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 3047; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0 3048; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 3049; AVX1-NEXT: vpxor %xmm0, %xmm4, %xmm0 3050; AVX1-NEXT: vpmovmskb %xmm0, %eax 3051; AVX1-NEXT: xorb %ah, %al 3052; AVX1-NEXT: setnp %al 3053; AVX1-NEXT: vzeroupper 3054; AVX1-NEXT: retq 3055; 3056; AVX2-LABEL: icmp_v64i8_v64i1: 3057; AVX2: # %bb.0: 3058; AVX2-NEXT: vpcmpeqb %ymm3, %ymm1, %ymm1 3059; AVX2-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0 3060; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0 3061; AVX2-NEXT: vpmovmskb %ymm0, %eax 3062; AVX2-NEXT: movl %eax, %ecx 3063; AVX2-NEXT: shrl $16, %ecx 3064; AVX2-NEXT: xorl %eax, %ecx 3065; AVX2-NEXT: xorb %ch, %cl 3066; AVX2-NEXT: setnp %al 3067; AVX2-NEXT: vzeroupper 3068; AVX2-NEXT: retq 3069; 3070; AVX512F-LABEL: icmp_v64i8_v64i1: 3071; AVX512F: # %bb.0: 3072; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm2 3073; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm3 3074; AVX512F-NEXT: vpcmpeqb %ymm2, %ymm3, %ymm2 3075; AVX512F-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 3076; AVX512F-NEXT: vextracti128 $1, %ymm2, %xmm1 3077; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm3 3078; AVX512F-NEXT: vpxor %xmm1, %xmm3, %xmm1 3079; AVX512F-NEXT: vpxor %xmm2, %xmm0, %xmm0 3080; AVX512F-NEXT: vpxor %xmm1, %xmm0, %xmm0 3081; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 3082; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0 3083; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 3084; AVX512F-NEXT: kshiftrw $8, %k0, %k1 3085; AVX512F-NEXT: kxorw %k1, %k0, %k0 3086; AVX512F-NEXT: kshiftrw $4, %k0, %k1 3087; AVX512F-NEXT: kxorw %k1, %k0, %k0 3088; AVX512F-NEXT: kshiftrw $2, %k0, %k1 3089; AVX512F-NEXT: kxorw %k1, %k0, %k0 3090; AVX512F-NEXT: kshiftrw $1, %k0, %k1 3091; AVX512F-NEXT: kxorw %k1, %k0, %k0 3092; AVX512F-NEXT: kmovw %k0, %eax 3093; AVX512F-NEXT: # kill: def $al killed $al killed $eax 3094; AVX512F-NEXT: vzeroupper 3095; AVX512F-NEXT: retq 3096; 3097; AVX512BW-LABEL: icmp_v64i8_v64i1: 3098; AVX512BW: # %bb.0: 3099; AVX512BW-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 3100; AVX512BW-NEXT: kmovq %k0, %rax 3101; AVX512BW-NEXT: movq %rax, %rcx 3102; AVX512BW-NEXT: shrq $32, %rcx 3103; AVX512BW-NEXT: xorl %eax, %ecx 3104; AVX512BW-NEXT: movl %ecx, %eax 3105; AVX512BW-NEXT: shrl $16, %eax 3106; AVX512BW-NEXT: xorl %ecx, %eax 3107; AVX512BW-NEXT: xorb %ah, %al 3108; AVX512BW-NEXT: setnp %al 3109; AVX512BW-NEXT: vzeroupper 3110; AVX512BW-NEXT: retq 3111; 3112; AVX512VL-LABEL: icmp_v64i8_v64i1: 3113; AVX512VL: # %bb.0: 3114; AVX512VL-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 3115; AVX512VL-NEXT: kmovq %k0, %rax 3116; AVX512VL-NEXT: movq %rax, %rcx 3117; AVX512VL-NEXT: shrq $32, %rcx 3118; AVX512VL-NEXT: xorl %eax, %ecx 3119; AVX512VL-NEXT: movl %ecx, %eax 3120; AVX512VL-NEXT: shrl $16, %eax 3121; AVX512VL-NEXT: xorl %ecx, %eax 3122; AVX512VL-NEXT: xorb %ah, %al 3123; AVX512VL-NEXT: setnp %al 3124; AVX512VL-NEXT: vzeroupper 3125; AVX512VL-NEXT: retq 3126 %a = icmp eq <64 x i8> %0, %1 3127 %b = call i1 @llvm.vector.reduce.xor.v64i1(<64 x i1> %a) 3128 ret i1 %b 3129} 3130 3131declare i1 @llvm.vector.reduce.xor.v2i1(<2 x i1>) 3132declare i1 @llvm.vector.reduce.xor.v4i1(<4 x i1>) 3133declare i1 @llvm.vector.reduce.xor.v8i1(<8 x i1>) 3134declare i1 @llvm.vector.reduce.xor.v16i1(<16 x i1>) 3135declare i1 @llvm.vector.reduce.xor.v32i1(<32 x i1>) 3136declare i1 @llvm.vector.reduce.xor.v64i1(<64 x i1>) 3137