1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2,X86-SSE2 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2,X64-SSE,X64-SSE2 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,X64-SSE,SSE41 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1OR2,AVX1 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX1OR2,AVX2 7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX,AVX512,AVX512F 8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX,AVX512,AVX512BW 9; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=AVX,AVX512,AVX512VL 10 11; 12; Truncate 13; 14 15define i1 @trunc_v2i64_v2i1(<2 x i64>) nounwind { 16; SSE2-LABEL: trunc_v2i64_v2i1: 17; SSE2: # %bb.0: 18; SSE2-NEXT: psllq $63, %xmm0 19; SSE2-NEXT: movmskpd %xmm0, %eax 20; SSE2-NEXT: cmpl $3, %eax 21; SSE2-NEXT: sete %al 22; SSE2-NEXT: ret{{[l|q]}} 23; 24; SSE41-LABEL: trunc_v2i64_v2i1: 25; SSE41: # %bb.0: 26; SSE41-NEXT: ptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 27; SSE41-NEXT: setb %al 28; SSE41-NEXT: retq 29; 30; AVX1OR2-LABEL: trunc_v2i64_v2i1: 31; AVX1OR2: # %bb.0: 32; AVX1OR2-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 33; AVX1OR2-NEXT: setb %al 34; AVX1OR2-NEXT: retq 35; 36; AVX512F-LABEL: trunc_v2i64_v2i1: 37; AVX512F: # %bb.0: 38; AVX512F-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 39; AVX512F-NEXT: setb %al 40; AVX512F-NEXT: retq 41; 42; AVX512BW-LABEL: trunc_v2i64_v2i1: 43; AVX512BW: # %bb.0: 44; AVX512BW-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 45; AVX512BW-NEXT: setb %al 46; AVX512BW-NEXT: retq 47; 48; AVX512VL-LABEL: trunc_v2i64_v2i1: 49; AVX512VL: # %bb.0: 50; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [1,1] 51; AVX512VL-NEXT: vptest %xmm1, %xmm0 52; AVX512VL-NEXT: setb %al 53; AVX512VL-NEXT: retq 54 %a = trunc <2 x i64> %0 to <2 x i1> 55 %b = call i1 @llvm.vector.reduce.and.v2i1(<2 x i1> %a) 56 ret i1 %b 57} 58 59define i1 @trunc_v4i32_v4i1(<4 x i32>) nounwind { 60; SSE2-LABEL: trunc_v4i32_v4i1: 61; SSE2: # %bb.0: 62; SSE2-NEXT: pslld $31, %xmm0 63; SSE2-NEXT: movmskps %xmm0, %eax 64; SSE2-NEXT: xorl $15, %eax 65; SSE2-NEXT: sete %al 66; SSE2-NEXT: ret{{[l|q]}} 67; 68; SSE41-LABEL: trunc_v4i32_v4i1: 69; SSE41: # %bb.0: 70; SSE41-NEXT: ptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 71; SSE41-NEXT: setb %al 72; SSE41-NEXT: retq 73; 74; AVX1OR2-LABEL: trunc_v4i32_v4i1: 75; AVX1OR2: # %bb.0: 76; AVX1OR2-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 77; AVX1OR2-NEXT: setb %al 78; AVX1OR2-NEXT: retq 79; 80; AVX512F-LABEL: trunc_v4i32_v4i1: 81; AVX512F: # %bb.0: 82; AVX512F-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 83; AVX512F-NEXT: setb %al 84; AVX512F-NEXT: retq 85; 86; AVX512BW-LABEL: trunc_v4i32_v4i1: 87; AVX512BW: # %bb.0: 88; AVX512BW-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 89; AVX512BW-NEXT: setb %al 90; AVX512BW-NEXT: retq 91; 92; AVX512VL-LABEL: trunc_v4i32_v4i1: 93; AVX512VL: # %bb.0: 94; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [4294967297,4294967297] 95; AVX512VL-NEXT: vptest %xmm1, %xmm0 96; AVX512VL-NEXT: setb %al 97; AVX512VL-NEXT: retq 98 %a = trunc <4 x i32> %0 to <4 x i1> 99 %b = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> %a) 100 ret i1 %b 101} 102 103define i1 @trunc_v8i16_v8i1(<8 x i16>) nounwind { 104; SSE2-LABEL: trunc_v8i16_v8i1: 105; SSE2: # %bb.0: 106; SSE2-NEXT: psllw $7, %xmm0 107; SSE2-NEXT: pmovmskb %xmm0, %eax 108; SSE2-NEXT: notl %eax 109; SSE2-NEXT: testl $21845, %eax # imm = 0x5555 110; SSE2-NEXT: sete %al 111; SSE2-NEXT: ret{{[l|q]}} 112; 113; SSE41-LABEL: trunc_v8i16_v8i1: 114; SSE41: # %bb.0: 115; SSE41-NEXT: ptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 116; SSE41-NEXT: setb %al 117; SSE41-NEXT: retq 118; 119; AVX1OR2-LABEL: trunc_v8i16_v8i1: 120; AVX1OR2: # %bb.0: 121; AVX1OR2-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 122; AVX1OR2-NEXT: setb %al 123; AVX1OR2-NEXT: retq 124; 125; AVX512F-LABEL: trunc_v8i16_v8i1: 126; AVX512F: # %bb.0: 127; AVX512F-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 128; AVX512F-NEXT: setb %al 129; AVX512F-NEXT: retq 130; 131; AVX512BW-LABEL: trunc_v8i16_v8i1: 132; AVX512BW: # %bb.0: 133; AVX512BW-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 134; AVX512BW-NEXT: setb %al 135; AVX512BW-NEXT: retq 136; 137; AVX512VL-LABEL: trunc_v8i16_v8i1: 138; AVX512VL: # %bb.0: 139; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [281479271743489,281479271743489] 140; AVX512VL-NEXT: vptest %xmm1, %xmm0 141; AVX512VL-NEXT: setb %al 142; AVX512VL-NEXT: retq 143 %a = trunc <8 x i16> %0 to <8 x i1> 144 %b = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> %a) 145 ret i1 %b 146} 147 148define i1 @trunc_v16i8_v16i1(<16 x i8>) nounwind { 149; SSE2-LABEL: trunc_v16i8_v16i1: 150; SSE2: # %bb.0: 151; SSE2-NEXT: psllw $7, %xmm0 152; SSE2-NEXT: pmovmskb %xmm0, %eax 153; SSE2-NEXT: xorl $65535, %eax # imm = 0xFFFF 154; SSE2-NEXT: sete %al 155; SSE2-NEXT: ret{{[l|q]}} 156; 157; SSE41-LABEL: trunc_v16i8_v16i1: 158; SSE41: # %bb.0: 159; SSE41-NEXT: ptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 160; SSE41-NEXT: setb %al 161; SSE41-NEXT: retq 162; 163; AVX1OR2-LABEL: trunc_v16i8_v16i1: 164; AVX1OR2: # %bb.0: 165; AVX1OR2-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 166; AVX1OR2-NEXT: setb %al 167; AVX1OR2-NEXT: retq 168; 169; AVX512F-LABEL: trunc_v16i8_v16i1: 170; AVX512F: # %bb.0: 171; AVX512F-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 172; AVX512F-NEXT: setb %al 173; AVX512F-NEXT: retq 174; 175; AVX512BW-LABEL: trunc_v16i8_v16i1: 176; AVX512BW: # %bb.0: 177; AVX512BW-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 178; AVX512BW-NEXT: setb %al 179; AVX512BW-NEXT: retq 180; 181; AVX512VL-LABEL: trunc_v16i8_v16i1: 182; AVX512VL: # %bb.0: 183; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [72340172838076673,72340172838076673] 184; AVX512VL-NEXT: vptest %xmm1, %xmm0 185; AVX512VL-NEXT: setb %al 186; AVX512VL-NEXT: retq 187 %a = trunc <16 x i8> %0 to <16 x i1> 188 %b = call i1 @llvm.vector.reduce.and.v16i1(<16 x i1> %a) 189 ret i1 %b 190} 191 192define i1 @trunc_v4i64_v4i1(<4 x i64>) nounwind { 193; SSE2-LABEL: trunc_v4i64_v4i1: 194; SSE2: # %bb.0: 195; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] 196; SSE2-NEXT: pslld $31, %xmm0 197; SSE2-NEXT: movmskps %xmm0, %eax 198; SSE2-NEXT: cmpl $15, %eax 199; SSE2-NEXT: sete %al 200; SSE2-NEXT: ret{{[l|q]}} 201; 202; SSE41-LABEL: trunc_v4i64_v4i1: 203; SSE41: # %bb.0: 204; SSE41-NEXT: pand %xmm1, %xmm0 205; SSE41-NEXT: ptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 206; SSE41-NEXT: setb %al 207; SSE41-NEXT: retq 208; 209; AVX1-LABEL: trunc_v4i64_v4i1: 210; AVX1: # %bb.0: 211; AVX1-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0 212; AVX1-NEXT: setb %al 213; AVX1-NEXT: vzeroupper 214; AVX1-NEXT: retq 215; 216; AVX2-LABEL: trunc_v4i64_v4i1: 217; AVX2: # %bb.0: 218; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [1,1,1,1] 219; AVX2-NEXT: vptest %ymm1, %ymm0 220; AVX2-NEXT: setb %al 221; AVX2-NEXT: vzeroupper 222; AVX2-NEXT: retq 223; 224; AVX512-LABEL: trunc_v4i64_v4i1: 225; AVX512: # %bb.0: 226; AVX512-NEXT: vpbroadcastq {{.*#+}} ymm1 = [1,1,1,1] 227; AVX512-NEXT: vptest %ymm1, %ymm0 228; AVX512-NEXT: setb %al 229; AVX512-NEXT: vzeroupper 230; AVX512-NEXT: retq 231 %a = trunc <4 x i64> %0 to <4 x i1> 232 %b = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> %a) 233 ret i1 %b 234} 235 236define i1 @trunc_v8i32_v8i1(<8 x i32>) nounwind { 237; SSE2-LABEL: trunc_v8i32_v8i1: 238; SSE2: # %bb.0: 239; SSE2-NEXT: pand %xmm1, %xmm0 240; SSE2-NEXT: pslld $31, %xmm0 241; SSE2-NEXT: movmskps %xmm0, %eax 242; SSE2-NEXT: xorl $15, %eax 243; SSE2-NEXT: sete %al 244; SSE2-NEXT: ret{{[l|q]}} 245; 246; SSE41-LABEL: trunc_v8i32_v8i1: 247; SSE41: # %bb.0: 248; SSE41-NEXT: pand %xmm1, %xmm0 249; SSE41-NEXT: ptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 250; SSE41-NEXT: setb %al 251; SSE41-NEXT: retq 252; 253; AVX1-LABEL: trunc_v8i32_v8i1: 254; AVX1: # %bb.0: 255; AVX1-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0 256; AVX1-NEXT: setb %al 257; AVX1-NEXT: vzeroupper 258; AVX1-NEXT: retq 259; 260; AVX2-LABEL: trunc_v8i32_v8i1: 261; AVX2: # %bb.0: 262; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [4294967297,4294967297,4294967297,4294967297] 263; AVX2-NEXT: vptest %ymm1, %ymm0 264; AVX2-NEXT: setb %al 265; AVX2-NEXT: vzeroupper 266; AVX2-NEXT: retq 267; 268; AVX512-LABEL: trunc_v8i32_v8i1: 269; AVX512: # %bb.0: 270; AVX512-NEXT: vpbroadcastq {{.*#+}} ymm1 = [4294967297,4294967297,4294967297,4294967297] 271; AVX512-NEXT: vptest %ymm1, %ymm0 272; AVX512-NEXT: setb %al 273; AVX512-NEXT: vzeroupper 274; AVX512-NEXT: retq 275 %a = trunc <8 x i32> %0 to <8 x i1> 276 %b = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> %a) 277 ret i1 %b 278} 279 280define i1 @trunc_v16i16_v16i1(<16 x i16>) nounwind { 281; SSE2-LABEL: trunc_v16i16_v16i1: 282; SSE2: # %bb.0: 283; SSE2-NEXT: pand %xmm1, %xmm0 284; SSE2-NEXT: psllw $7, %xmm0 285; SSE2-NEXT: pmovmskb %xmm0, %eax 286; SSE2-NEXT: notl %eax 287; SSE2-NEXT: testl $21845, %eax # imm = 0x5555 288; SSE2-NEXT: sete %al 289; SSE2-NEXT: ret{{[l|q]}} 290; 291; SSE41-LABEL: trunc_v16i16_v16i1: 292; SSE41: # %bb.0: 293; SSE41-NEXT: pand %xmm1, %xmm0 294; SSE41-NEXT: ptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 295; SSE41-NEXT: setb %al 296; SSE41-NEXT: retq 297; 298; AVX1-LABEL: trunc_v16i16_v16i1: 299; AVX1: # %bb.0: 300; AVX1-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0 301; AVX1-NEXT: setb %al 302; AVX1-NEXT: vzeroupper 303; AVX1-NEXT: retq 304; 305; AVX2-LABEL: trunc_v16i16_v16i1: 306; AVX2: # %bb.0: 307; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [281479271743489,281479271743489,281479271743489,281479271743489] 308; AVX2-NEXT: vptest %ymm1, %ymm0 309; AVX2-NEXT: setb %al 310; AVX2-NEXT: vzeroupper 311; AVX2-NEXT: retq 312; 313; AVX512-LABEL: trunc_v16i16_v16i1: 314; AVX512: # %bb.0: 315; AVX512-NEXT: vpbroadcastq {{.*#+}} ymm1 = [281479271743489,281479271743489,281479271743489,281479271743489] 316; AVX512-NEXT: vptest %ymm1, %ymm0 317; AVX512-NEXT: setb %al 318; AVX512-NEXT: vzeroupper 319; AVX512-NEXT: retq 320 %a = trunc <16 x i16> %0 to <16 x i1> 321 %b = call i1 @llvm.vector.reduce.and.v16i1(<16 x i1> %a) 322 ret i1 %b 323} 324 325define i1 @trunc_v32i8_v32i1(<32 x i8>) nounwind { 326; SSE2-LABEL: trunc_v32i8_v32i1: 327; SSE2: # %bb.0: 328; SSE2-NEXT: pand %xmm1, %xmm0 329; SSE2-NEXT: psllw $7, %xmm0 330; SSE2-NEXT: pmovmskb %xmm0, %eax 331; SSE2-NEXT: xorl $65535, %eax # imm = 0xFFFF 332; SSE2-NEXT: sete %al 333; SSE2-NEXT: ret{{[l|q]}} 334; 335; SSE41-LABEL: trunc_v32i8_v32i1: 336; SSE41: # %bb.0: 337; SSE41-NEXT: pand %xmm1, %xmm0 338; SSE41-NEXT: ptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 339; SSE41-NEXT: setb %al 340; SSE41-NEXT: retq 341; 342; AVX1-LABEL: trunc_v32i8_v32i1: 343; AVX1: # %bb.0: 344; AVX1-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0 345; AVX1-NEXT: setb %al 346; AVX1-NEXT: vzeroupper 347; AVX1-NEXT: retq 348; 349; AVX2-LABEL: trunc_v32i8_v32i1: 350; AVX2: # %bb.0: 351; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [72340172838076673,72340172838076673,72340172838076673,72340172838076673] 352; AVX2-NEXT: vptest %ymm1, %ymm0 353; AVX2-NEXT: setb %al 354; AVX2-NEXT: vzeroupper 355; AVX2-NEXT: retq 356; 357; AVX512-LABEL: trunc_v32i8_v32i1: 358; AVX512: # %bb.0: 359; AVX512-NEXT: vpbroadcastq {{.*#+}} ymm1 = [72340172838076673,72340172838076673,72340172838076673,72340172838076673] 360; AVX512-NEXT: vptest %ymm1, %ymm0 361; AVX512-NEXT: setb %al 362; AVX512-NEXT: vzeroupper 363; AVX512-NEXT: retq 364 %a = trunc <32 x i8> %0 to <32 x i1> 365 %b = call i1 @llvm.vector.reduce.and.v32i1(<32 x i1> %a) 366 ret i1 %b 367} 368 369define i1 @trunc_v8i64_v8i1(<8 x i64>) nounwind { 370; X86-SSE2-LABEL: trunc_v8i64_v8i1: 371; X86-SSE2: # %bb.0: 372; X86-SSE2-NEXT: pushl %ebp 373; X86-SSE2-NEXT: movl %esp, %ebp 374; X86-SSE2-NEXT: andl $-16, %esp 375; X86-SSE2-NEXT: subl $16, %esp 376; X86-SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] 377; X86-SSE2-NEXT: pslld $16, %xmm0 378; X86-SSE2-NEXT: psrad $16, %xmm0 379; X86-SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],mem[0,2] 380; X86-SSE2-NEXT: pslld $16, %xmm2 381; X86-SSE2-NEXT: psrad $16, %xmm2 382; X86-SSE2-NEXT: packssdw %xmm2, %xmm0 383; X86-SSE2-NEXT: psllw $15, %xmm0 384; X86-SSE2-NEXT: packsswb %xmm0, %xmm0 385; X86-SSE2-NEXT: pmovmskb %xmm0, %eax 386; X86-SSE2-NEXT: cmpb $-1, %al 387; X86-SSE2-NEXT: sete %al 388; X86-SSE2-NEXT: movl %ebp, %esp 389; X86-SSE2-NEXT: popl %ebp 390; X86-SSE2-NEXT: retl 391; 392; X64-SSE2-LABEL: trunc_v8i64_v8i1: 393; X64-SSE2: # %bb.0: 394; X64-SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2] 395; X64-SSE2-NEXT: pslld $16, %xmm2 396; X64-SSE2-NEXT: psrad $16, %xmm2 397; X64-SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] 398; X64-SSE2-NEXT: pslld $16, %xmm0 399; X64-SSE2-NEXT: psrad $16, %xmm0 400; X64-SSE2-NEXT: packssdw %xmm2, %xmm0 401; X64-SSE2-NEXT: psllw $15, %xmm0 402; X64-SSE2-NEXT: packsswb %xmm0, %xmm0 403; X64-SSE2-NEXT: pmovmskb %xmm0, %eax 404; X64-SSE2-NEXT: cmpb $-1, %al 405; X64-SSE2-NEXT: sete %al 406; X64-SSE2-NEXT: retq 407; 408; SSE41-LABEL: trunc_v8i64_v8i1: 409; SSE41: # %bb.0: 410; SSE41-NEXT: pand %xmm3, %xmm1 411; SSE41-NEXT: pand %xmm2, %xmm0 412; SSE41-NEXT: pand %xmm1, %xmm0 413; SSE41-NEXT: ptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 414; SSE41-NEXT: setb %al 415; SSE41-NEXT: retq 416; 417; AVX1-LABEL: trunc_v8i64_v8i1: 418; AVX1: # %bb.0: 419; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 420; AVX1-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0 421; AVX1-NEXT: setb %al 422; AVX1-NEXT: vzeroupper 423; AVX1-NEXT: retq 424; 425; AVX2-LABEL: trunc_v8i64_v8i1: 426; AVX2: # %bb.0: 427; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 428; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [1,1,1,1] 429; AVX2-NEXT: vptest %ymm1, %ymm0 430; AVX2-NEXT: setb %al 431; AVX2-NEXT: vzeroupper 432; AVX2-NEXT: retq 433; 434; AVX512-LABEL: trunc_v8i64_v8i1: 435; AVX512: # %bb.0: 436; AVX512-NEXT: vpbroadcastq {{.*#+}} zmm1 = [1,1,1,1,1,1,1,1] 437; AVX512-NEXT: vpandq %zmm1, %zmm0, %zmm0 438; AVX512-NEXT: vpcmpneqd %zmm1, %zmm0, %k0 439; AVX512-NEXT: kortestw %k0, %k0 440; AVX512-NEXT: sete %al 441; AVX512-NEXT: vzeroupper 442; AVX512-NEXT: retq 443 %a = trunc <8 x i64> %0 to <8 x i1> 444 %b = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> %a) 445 ret i1 %b 446} 447 448define i1 @trunc_v16i32_v16i1(<16 x i32>) nounwind { 449; X86-SSE2-LABEL: trunc_v16i32_v16i1: 450; X86-SSE2: # %bb.0: 451; X86-SSE2-NEXT: pushl %ebp 452; X86-SSE2-NEXT: movl %esp, %ebp 453; X86-SSE2-NEXT: andl $-16, %esp 454; X86-SSE2-NEXT: subl $16, %esp 455; X86-SSE2-NEXT: pand %xmm2, %xmm0 456; X86-SSE2-NEXT: pand 8(%ebp), %xmm1 457; X86-SSE2-NEXT: pand %xmm0, %xmm1 458; X86-SSE2-NEXT: pslld $31, %xmm1 459; X86-SSE2-NEXT: movmskps %xmm1, %eax 460; X86-SSE2-NEXT: xorl $15, %eax 461; X86-SSE2-NEXT: sete %al 462; X86-SSE2-NEXT: movl %ebp, %esp 463; X86-SSE2-NEXT: popl %ebp 464; X86-SSE2-NEXT: retl 465; 466; X64-SSE2-LABEL: trunc_v16i32_v16i1: 467; X64-SSE2: # %bb.0: 468; X64-SSE2-NEXT: pand %xmm3, %xmm1 469; X64-SSE2-NEXT: pand %xmm2, %xmm0 470; X64-SSE2-NEXT: pand %xmm1, %xmm0 471; X64-SSE2-NEXT: pslld $31, %xmm0 472; X64-SSE2-NEXT: movmskps %xmm0, %eax 473; X64-SSE2-NEXT: xorl $15, %eax 474; X64-SSE2-NEXT: sete %al 475; X64-SSE2-NEXT: retq 476; 477; SSE41-LABEL: trunc_v16i32_v16i1: 478; SSE41: # %bb.0: 479; SSE41-NEXT: pand %xmm3, %xmm1 480; SSE41-NEXT: pand %xmm2, %xmm0 481; SSE41-NEXT: pand %xmm1, %xmm0 482; SSE41-NEXT: ptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 483; SSE41-NEXT: setb %al 484; SSE41-NEXT: retq 485; 486; AVX1-LABEL: trunc_v16i32_v16i1: 487; AVX1: # %bb.0: 488; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 489; AVX1-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0 490; AVX1-NEXT: setb %al 491; AVX1-NEXT: vzeroupper 492; AVX1-NEXT: retq 493; 494; AVX2-LABEL: trunc_v16i32_v16i1: 495; AVX2: # %bb.0: 496; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 497; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [4294967297,4294967297,4294967297,4294967297] 498; AVX2-NEXT: vptest %ymm1, %ymm0 499; AVX2-NEXT: setb %al 500; AVX2-NEXT: vzeroupper 501; AVX2-NEXT: retq 502; 503; AVX512-LABEL: trunc_v16i32_v16i1: 504; AVX512: # %bb.0: 505; AVX512-NEXT: vptestnmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k0 506; AVX512-NEXT: kortestw %k0, %k0 507; AVX512-NEXT: sete %al 508; AVX512-NEXT: vzeroupper 509; AVX512-NEXT: retq 510 %a = trunc <16 x i32> %0 to <16 x i1> 511 %b = call i1 @llvm.vector.reduce.and.v16i1(<16 x i1> %a) 512 ret i1 %b 513} 514 515define i1 @trunc_v32i16_v32i1(<32 x i16>) nounwind { 516; X86-SSE2-LABEL: trunc_v32i16_v32i1: 517; X86-SSE2: # %bb.0: 518; X86-SSE2-NEXT: pushl %ebp 519; X86-SSE2-NEXT: movl %esp, %ebp 520; X86-SSE2-NEXT: andl $-16, %esp 521; X86-SSE2-NEXT: subl $16, %esp 522; X86-SSE2-NEXT: pand %xmm2, %xmm0 523; X86-SSE2-NEXT: pand 8(%ebp), %xmm1 524; X86-SSE2-NEXT: pand %xmm0, %xmm1 525; X86-SSE2-NEXT: psllw $7, %xmm1 526; X86-SSE2-NEXT: pmovmskb %xmm1, %eax 527; X86-SSE2-NEXT: notl %eax 528; X86-SSE2-NEXT: testl $21845, %eax # imm = 0x5555 529; X86-SSE2-NEXT: sete %al 530; X86-SSE2-NEXT: movl %ebp, %esp 531; X86-SSE2-NEXT: popl %ebp 532; X86-SSE2-NEXT: retl 533; 534; X64-SSE2-LABEL: trunc_v32i16_v32i1: 535; X64-SSE2: # %bb.0: 536; X64-SSE2-NEXT: pand %xmm3, %xmm1 537; X64-SSE2-NEXT: pand %xmm2, %xmm0 538; X64-SSE2-NEXT: pand %xmm1, %xmm0 539; X64-SSE2-NEXT: psllw $7, %xmm0 540; X64-SSE2-NEXT: pmovmskb %xmm0, %eax 541; X64-SSE2-NEXT: notl %eax 542; X64-SSE2-NEXT: testl $21845, %eax # imm = 0x5555 543; X64-SSE2-NEXT: sete %al 544; X64-SSE2-NEXT: retq 545; 546; SSE41-LABEL: trunc_v32i16_v32i1: 547; SSE41: # %bb.0: 548; SSE41-NEXT: pand %xmm3, %xmm1 549; SSE41-NEXT: pand %xmm2, %xmm0 550; SSE41-NEXT: pand %xmm1, %xmm0 551; SSE41-NEXT: ptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 552; SSE41-NEXT: setb %al 553; SSE41-NEXT: retq 554; 555; AVX1-LABEL: trunc_v32i16_v32i1: 556; AVX1: # %bb.0: 557; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 558; AVX1-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0 559; AVX1-NEXT: setb %al 560; AVX1-NEXT: vzeroupper 561; AVX1-NEXT: retq 562; 563; AVX2-LABEL: trunc_v32i16_v32i1: 564; AVX2: # %bb.0: 565; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 566; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [281479271743489,281479271743489,281479271743489,281479271743489] 567; AVX2-NEXT: vptest %ymm1, %ymm0 568; AVX2-NEXT: setb %al 569; AVX2-NEXT: vzeroupper 570; AVX2-NEXT: retq 571; 572; AVX512F-LABEL: trunc_v32i16_v32i1: 573; AVX512F: # %bb.0: 574; AVX512F-NEXT: vpbroadcastd {{.*#+}} zmm1 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] 575; AVX512F-NEXT: vpandq %zmm1, %zmm0, %zmm0 576; AVX512F-NEXT: vpcmpneqd %zmm1, %zmm0, %k0 577; AVX512F-NEXT: kortestw %k0, %k0 578; AVX512F-NEXT: sete %al 579; AVX512F-NEXT: vzeroupper 580; AVX512F-NEXT: retq 581; 582; AVX512BW-LABEL: trunc_v32i16_v32i1: 583; AVX512BW: # %bb.0: 584; AVX512BW-NEXT: vpbroadcastw {{.*#+}} zmm1 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] 585; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0 586; AVX512BW-NEXT: vpcmpneqd %zmm1, %zmm0, %k0 587; AVX512BW-NEXT: kortestw %k0, %k0 588; AVX512BW-NEXT: sete %al 589; AVX512BW-NEXT: vzeroupper 590; AVX512BW-NEXT: retq 591; 592; AVX512VL-LABEL: trunc_v32i16_v32i1: 593; AVX512VL: # %bb.0: 594; AVX512VL-NEXT: vpbroadcastw {{.*#+}} zmm1 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] 595; AVX512VL-NEXT: vpandq %zmm1, %zmm0, %zmm0 596; AVX512VL-NEXT: vpcmpneqd %zmm1, %zmm0, %k0 597; AVX512VL-NEXT: kortestw %k0, %k0 598; AVX512VL-NEXT: sete %al 599; AVX512VL-NEXT: vzeroupper 600; AVX512VL-NEXT: retq 601 %a = trunc <32 x i16> %0 to <32 x i1> 602 %b = call i1 @llvm.vector.reduce.and.v32i1(<32 x i1> %a) 603 ret i1 %b 604} 605 606define i1 @trunc_v64i8_v64i1(<64 x i8>) nounwind { 607; X86-SSE2-LABEL: trunc_v64i8_v64i1: 608; X86-SSE2: # %bb.0: 609; X86-SSE2-NEXT: pushl %ebp 610; X86-SSE2-NEXT: movl %esp, %ebp 611; X86-SSE2-NEXT: andl $-16, %esp 612; X86-SSE2-NEXT: subl $16, %esp 613; X86-SSE2-NEXT: pand %xmm2, %xmm0 614; X86-SSE2-NEXT: pand 8(%ebp), %xmm1 615; X86-SSE2-NEXT: pand %xmm0, %xmm1 616; X86-SSE2-NEXT: psllw $7, %xmm1 617; X86-SSE2-NEXT: pmovmskb %xmm1, %eax 618; X86-SSE2-NEXT: xorl $65535, %eax # imm = 0xFFFF 619; X86-SSE2-NEXT: sete %al 620; X86-SSE2-NEXT: movl %ebp, %esp 621; X86-SSE2-NEXT: popl %ebp 622; X86-SSE2-NEXT: retl 623; 624; X64-SSE2-LABEL: trunc_v64i8_v64i1: 625; X64-SSE2: # %bb.0: 626; X64-SSE2-NEXT: pand %xmm3, %xmm1 627; X64-SSE2-NEXT: pand %xmm2, %xmm0 628; X64-SSE2-NEXT: pand %xmm1, %xmm0 629; X64-SSE2-NEXT: psllw $7, %xmm0 630; X64-SSE2-NEXT: pmovmskb %xmm0, %eax 631; X64-SSE2-NEXT: xorl $65535, %eax # imm = 0xFFFF 632; X64-SSE2-NEXT: sete %al 633; X64-SSE2-NEXT: retq 634; 635; SSE41-LABEL: trunc_v64i8_v64i1: 636; SSE41: # %bb.0: 637; SSE41-NEXT: pand %xmm3, %xmm1 638; SSE41-NEXT: pand %xmm2, %xmm0 639; SSE41-NEXT: pand %xmm1, %xmm0 640; SSE41-NEXT: ptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 641; SSE41-NEXT: setb %al 642; SSE41-NEXT: retq 643; 644; AVX1-LABEL: trunc_v64i8_v64i1: 645; AVX1: # %bb.0: 646; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 647; AVX1-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0 648; AVX1-NEXT: setb %al 649; AVX1-NEXT: vzeroupper 650; AVX1-NEXT: retq 651; 652; AVX2-LABEL: trunc_v64i8_v64i1: 653; AVX2: # %bb.0: 654; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 655; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [72340172838076673,72340172838076673,72340172838076673,72340172838076673] 656; AVX2-NEXT: vptest %ymm1, %ymm0 657; AVX2-NEXT: setb %al 658; AVX2-NEXT: vzeroupper 659; AVX2-NEXT: retq 660; 661; AVX512F-LABEL: trunc_v64i8_v64i1: 662; AVX512F: # %bb.0: 663; AVX512F-NEXT: vpbroadcastd {{.*#+}} zmm1 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] 664; AVX512F-NEXT: vpandq %zmm1, %zmm0, %zmm0 665; AVX512F-NEXT: vpcmpneqd %zmm1, %zmm0, %k0 666; AVX512F-NEXT: kortestw %k0, %k0 667; AVX512F-NEXT: sete %al 668; AVX512F-NEXT: vzeroupper 669; AVX512F-NEXT: retq 670; 671; AVX512BW-LABEL: trunc_v64i8_v64i1: 672; AVX512BW: # %bb.0: 673; AVX512BW-NEXT: vpbroadcastb {{.*#+}} zmm1 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] 674; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0 675; AVX512BW-NEXT: vpcmpneqd %zmm1, %zmm0, %k0 676; AVX512BW-NEXT: kortestw %k0, %k0 677; AVX512BW-NEXT: sete %al 678; AVX512BW-NEXT: vzeroupper 679; AVX512BW-NEXT: retq 680; 681; AVX512VL-LABEL: trunc_v64i8_v64i1: 682; AVX512VL: # %bb.0: 683; AVX512VL-NEXT: vpbroadcastb {{.*#+}} zmm1 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] 684; AVX512VL-NEXT: vpandq %zmm1, %zmm0, %zmm0 685; AVX512VL-NEXT: vpcmpneqd %zmm1, %zmm0, %k0 686; AVX512VL-NEXT: kortestw %k0, %k0 687; AVX512VL-NEXT: sete %al 688; AVX512VL-NEXT: vzeroupper 689; AVX512VL-NEXT: retq 690 %a = trunc <64 x i8> %0 to <64 x i1> 691 %b = call i1 @llvm.vector.reduce.and.v64i1(<64 x i1> %a) 692 ret i1 %b 693} 694 695; 696; Comparison With Zero 697; 698 699define i1 @icmp0_v2i64_v2i1(<2 x i64>) nounwind { 700; SSE2-LABEL: icmp0_v2i64_v2i1: 701; SSE2: # %bb.0: 702; SSE2-NEXT: pxor %xmm1, %xmm1 703; SSE2-NEXT: pcmpeqd %xmm0, %xmm1 704; SSE2-NEXT: movmskps %xmm1, %eax 705; SSE2-NEXT: xorl $15, %eax 706; SSE2-NEXT: sete %al 707; SSE2-NEXT: ret{{[l|q]}} 708; 709; SSE41-LABEL: icmp0_v2i64_v2i1: 710; SSE41: # %bb.0: 711; SSE41-NEXT: ptest %xmm0, %xmm0 712; SSE41-NEXT: sete %al 713; SSE41-NEXT: retq 714; 715; AVX-LABEL: icmp0_v2i64_v2i1: 716; AVX: # %bb.0: 717; AVX-NEXT: vptest %xmm0, %xmm0 718; AVX-NEXT: sete %al 719; AVX-NEXT: retq 720 %a = icmp eq <2 x i64> %0, zeroinitializer 721 %b = call i1 @llvm.vector.reduce.and.v2i1(<2 x i1> %a) 722 ret i1 %b 723} 724 725define i1 @icmp0_v4i32_v4i1(<4 x i32>) nounwind { 726; SSE2-LABEL: icmp0_v4i32_v4i1: 727; SSE2: # %bb.0: 728; SSE2-NEXT: pxor %xmm1, %xmm1 729; SSE2-NEXT: pcmpeqd %xmm0, %xmm1 730; SSE2-NEXT: movmskps %xmm1, %eax 731; SSE2-NEXT: xorl $15, %eax 732; SSE2-NEXT: sete %al 733; SSE2-NEXT: ret{{[l|q]}} 734; 735; SSE41-LABEL: icmp0_v4i32_v4i1: 736; SSE41: # %bb.0: 737; SSE41-NEXT: ptest %xmm0, %xmm0 738; SSE41-NEXT: sete %al 739; SSE41-NEXT: retq 740; 741; AVX-LABEL: icmp0_v4i32_v4i1: 742; AVX: # %bb.0: 743; AVX-NEXT: vptest %xmm0, %xmm0 744; AVX-NEXT: sete %al 745; AVX-NEXT: retq 746 %a = icmp eq <4 x i32> %0, zeroinitializer 747 %b = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> %a) 748 ret i1 %b 749} 750 751define i1 @icmp0_v8i16_v8i1(<8 x i16>) nounwind { 752; SSE2-LABEL: icmp0_v8i16_v8i1: 753; SSE2: # %bb.0: 754; SSE2-NEXT: pxor %xmm1, %xmm1 755; SSE2-NEXT: pcmpeqb %xmm0, %xmm1 756; SSE2-NEXT: pmovmskb %xmm1, %eax 757; SSE2-NEXT: xorl $65535, %eax # imm = 0xFFFF 758; SSE2-NEXT: sete %al 759; SSE2-NEXT: ret{{[l|q]}} 760; 761; SSE41-LABEL: icmp0_v8i16_v8i1: 762; SSE41: # %bb.0: 763; SSE41-NEXT: ptest %xmm0, %xmm0 764; SSE41-NEXT: sete %al 765; SSE41-NEXT: retq 766; 767; AVX-LABEL: icmp0_v8i16_v8i1: 768; AVX: # %bb.0: 769; AVX-NEXT: vptest %xmm0, %xmm0 770; AVX-NEXT: sete %al 771; AVX-NEXT: retq 772 %a = icmp eq <8 x i16> %0, zeroinitializer 773 %b = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> %a) 774 ret i1 %b 775} 776 777define i1 @icmp0_v16i8_v16i1(<16 x i8>) nounwind { 778; SSE2-LABEL: icmp0_v16i8_v16i1: 779; SSE2: # %bb.0: 780; SSE2-NEXT: pxor %xmm1, %xmm1 781; SSE2-NEXT: pcmpeqb %xmm0, %xmm1 782; SSE2-NEXT: pmovmskb %xmm1, %eax 783; SSE2-NEXT: xorl $65535, %eax # imm = 0xFFFF 784; SSE2-NEXT: sete %al 785; SSE2-NEXT: ret{{[l|q]}} 786; 787; SSE41-LABEL: icmp0_v16i8_v16i1: 788; SSE41: # %bb.0: 789; SSE41-NEXT: ptest %xmm0, %xmm0 790; SSE41-NEXT: sete %al 791; SSE41-NEXT: retq 792; 793; AVX-LABEL: icmp0_v16i8_v16i1: 794; AVX: # %bb.0: 795; AVX-NEXT: vptest %xmm0, %xmm0 796; AVX-NEXT: sete %al 797; AVX-NEXT: retq 798 %a = icmp eq <16 x i8> %0, zeroinitializer 799 %b = call i1 @llvm.vector.reduce.and.v16i1(<16 x i1> %a) 800 ret i1 %b 801} 802 803define i1 @icmp0_v4i64_v4i1(<4 x i64>) nounwind { 804; SSE2-LABEL: icmp0_v4i64_v4i1: 805; SSE2: # %bb.0: 806; SSE2-NEXT: por %xmm1, %xmm0 807; SSE2-NEXT: pxor %xmm1, %xmm1 808; SSE2-NEXT: pcmpeqd %xmm0, %xmm1 809; SSE2-NEXT: movmskps %xmm1, %eax 810; SSE2-NEXT: xorl $15, %eax 811; SSE2-NEXT: sete %al 812; SSE2-NEXT: ret{{[l|q]}} 813; 814; SSE41-LABEL: icmp0_v4i64_v4i1: 815; SSE41: # %bb.0: 816; SSE41-NEXT: por %xmm1, %xmm0 817; SSE41-NEXT: ptest %xmm0, %xmm0 818; SSE41-NEXT: sete %al 819; SSE41-NEXT: retq 820; 821; AVX-LABEL: icmp0_v4i64_v4i1: 822; AVX: # %bb.0: 823; AVX-NEXT: vptest %ymm0, %ymm0 824; AVX-NEXT: sete %al 825; AVX-NEXT: vzeroupper 826; AVX-NEXT: retq 827 %a = icmp eq <4 x i64> %0, zeroinitializer 828 %b = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> %a) 829 ret i1 %b 830} 831 832define i1 @icmp0_v8i32_v8i1(<8 x i32>) nounwind { 833; SSE2-LABEL: icmp0_v8i32_v8i1: 834; SSE2: # %bb.0: 835; SSE2-NEXT: por %xmm1, %xmm0 836; SSE2-NEXT: pxor %xmm1, %xmm1 837; SSE2-NEXT: pcmpeqd %xmm0, %xmm1 838; SSE2-NEXT: movmskps %xmm1, %eax 839; SSE2-NEXT: xorl $15, %eax 840; SSE2-NEXT: sete %al 841; SSE2-NEXT: ret{{[l|q]}} 842; 843; SSE41-LABEL: icmp0_v8i32_v8i1: 844; SSE41: # %bb.0: 845; SSE41-NEXT: por %xmm1, %xmm0 846; SSE41-NEXT: ptest %xmm0, %xmm0 847; SSE41-NEXT: sete %al 848; SSE41-NEXT: retq 849; 850; AVX-LABEL: icmp0_v8i32_v8i1: 851; AVX: # %bb.0: 852; AVX-NEXT: vptest %ymm0, %ymm0 853; AVX-NEXT: sete %al 854; AVX-NEXT: vzeroupper 855; AVX-NEXT: retq 856 %a = icmp eq <8 x i32> %0, zeroinitializer 857 %b = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> %a) 858 ret i1 %b 859} 860 861define i1 @icmp0_v16i16_v16i1(<16 x i16>) nounwind { 862; SSE2-LABEL: icmp0_v16i16_v16i1: 863; SSE2: # %bb.0: 864; SSE2-NEXT: por %xmm1, %xmm0 865; SSE2-NEXT: pxor %xmm1, %xmm1 866; SSE2-NEXT: pcmpeqb %xmm0, %xmm1 867; SSE2-NEXT: pmovmskb %xmm1, %eax 868; SSE2-NEXT: xorl $65535, %eax # imm = 0xFFFF 869; SSE2-NEXT: sete %al 870; SSE2-NEXT: ret{{[l|q]}} 871; 872; SSE41-LABEL: icmp0_v16i16_v16i1: 873; SSE41: # %bb.0: 874; SSE41-NEXT: por %xmm1, %xmm0 875; SSE41-NEXT: ptest %xmm0, %xmm0 876; SSE41-NEXT: sete %al 877; SSE41-NEXT: retq 878; 879; AVX-LABEL: icmp0_v16i16_v16i1: 880; AVX: # %bb.0: 881; AVX-NEXT: vptest %ymm0, %ymm0 882; AVX-NEXT: sete %al 883; AVX-NEXT: vzeroupper 884; AVX-NEXT: retq 885 %a = icmp eq <16 x i16> %0, zeroinitializer 886 %b = call i1 @llvm.vector.reduce.and.v16i1(<16 x i1> %a) 887 ret i1 %b 888} 889 890define i1 @icmp0_v32i8_v32i1(<32 x i8>) nounwind { 891; SSE2-LABEL: icmp0_v32i8_v32i1: 892; SSE2: # %bb.0: 893; SSE2-NEXT: por %xmm1, %xmm0 894; SSE2-NEXT: pxor %xmm1, %xmm1 895; SSE2-NEXT: pcmpeqb %xmm0, %xmm1 896; SSE2-NEXT: pmovmskb %xmm1, %eax 897; SSE2-NEXT: xorl $65535, %eax # imm = 0xFFFF 898; SSE2-NEXT: sete %al 899; SSE2-NEXT: ret{{[l|q]}} 900; 901; SSE41-LABEL: icmp0_v32i8_v32i1: 902; SSE41: # %bb.0: 903; SSE41-NEXT: por %xmm1, %xmm0 904; SSE41-NEXT: ptest %xmm0, %xmm0 905; SSE41-NEXT: sete %al 906; SSE41-NEXT: retq 907; 908; AVX-LABEL: icmp0_v32i8_v32i1: 909; AVX: # %bb.0: 910; AVX-NEXT: vptest %ymm0, %ymm0 911; AVX-NEXT: sete %al 912; AVX-NEXT: vzeroupper 913; AVX-NEXT: retq 914 %a = icmp eq <32 x i8> %0, zeroinitializer 915 %b = call i1 @llvm.vector.reduce.and.v32i1(<32 x i1> %a) 916 ret i1 %b 917} 918 919define i1 @icmp0_v8i64_v8i1(<8 x i64>) nounwind { 920; X86-SSE2-LABEL: icmp0_v8i64_v8i1: 921; X86-SSE2: # %bb.0: 922; X86-SSE2-NEXT: pushl %ebp 923; X86-SSE2-NEXT: movl %esp, %ebp 924; X86-SSE2-NEXT: andl $-16, %esp 925; X86-SSE2-NEXT: subl $16, %esp 926; X86-SSE2-NEXT: por %xmm2, %xmm0 927; X86-SSE2-NEXT: por 8(%ebp), %xmm1 928; X86-SSE2-NEXT: por %xmm0, %xmm1 929; X86-SSE2-NEXT: pxor %xmm0, %xmm0 930; X86-SSE2-NEXT: pcmpeqd %xmm1, %xmm0 931; X86-SSE2-NEXT: movmskps %xmm0, %eax 932; X86-SSE2-NEXT: xorl $15, %eax 933; X86-SSE2-NEXT: sete %al 934; X86-SSE2-NEXT: movl %ebp, %esp 935; X86-SSE2-NEXT: popl %ebp 936; X86-SSE2-NEXT: retl 937; 938; X64-SSE2-LABEL: icmp0_v8i64_v8i1: 939; X64-SSE2: # %bb.0: 940; X64-SSE2-NEXT: por %xmm3, %xmm1 941; X64-SSE2-NEXT: por %xmm2, %xmm0 942; X64-SSE2-NEXT: por %xmm1, %xmm0 943; X64-SSE2-NEXT: pxor %xmm1, %xmm1 944; X64-SSE2-NEXT: pcmpeqd %xmm0, %xmm1 945; X64-SSE2-NEXT: movmskps %xmm1, %eax 946; X64-SSE2-NEXT: xorl $15, %eax 947; X64-SSE2-NEXT: sete %al 948; X64-SSE2-NEXT: retq 949; 950; SSE41-LABEL: icmp0_v8i64_v8i1: 951; SSE41: # %bb.0: 952; SSE41-NEXT: por %xmm3, %xmm1 953; SSE41-NEXT: por %xmm2, %xmm0 954; SSE41-NEXT: por %xmm1, %xmm0 955; SSE41-NEXT: ptest %xmm0, %xmm0 956; SSE41-NEXT: sete %al 957; SSE41-NEXT: retq 958; 959; AVX1-LABEL: icmp0_v8i64_v8i1: 960; AVX1: # %bb.0: 961; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 962; AVX1-NEXT: vptest %ymm0, %ymm0 963; AVX1-NEXT: sete %al 964; AVX1-NEXT: vzeroupper 965; AVX1-NEXT: retq 966; 967; AVX2-LABEL: icmp0_v8i64_v8i1: 968; AVX2: # %bb.0: 969; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 970; AVX2-NEXT: vptest %ymm0, %ymm0 971; AVX2-NEXT: sete %al 972; AVX2-NEXT: vzeroupper 973; AVX2-NEXT: retq 974; 975; AVX512-LABEL: icmp0_v8i64_v8i1: 976; AVX512: # %bb.0: 977; AVX512-NEXT: vptestmd %zmm0, %zmm0, %k0 978; AVX512-NEXT: kortestw %k0, %k0 979; AVX512-NEXT: sete %al 980; AVX512-NEXT: vzeroupper 981; AVX512-NEXT: retq 982 %a = icmp eq <8 x i64> %0, zeroinitializer 983 %b = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> %a) 984 ret i1 %b 985} 986 987define i1 @icmp0_v16i32_v16i1(<16 x i32>) nounwind { 988; X86-SSE2-LABEL: icmp0_v16i32_v16i1: 989; X86-SSE2: # %bb.0: 990; X86-SSE2-NEXT: pushl %ebp 991; X86-SSE2-NEXT: movl %esp, %ebp 992; X86-SSE2-NEXT: andl $-16, %esp 993; X86-SSE2-NEXT: subl $16, %esp 994; X86-SSE2-NEXT: por %xmm2, %xmm0 995; X86-SSE2-NEXT: por 8(%ebp), %xmm1 996; X86-SSE2-NEXT: por %xmm0, %xmm1 997; X86-SSE2-NEXT: pxor %xmm0, %xmm0 998; X86-SSE2-NEXT: pcmpeqd %xmm1, %xmm0 999; X86-SSE2-NEXT: movmskps %xmm0, %eax 1000; X86-SSE2-NEXT: xorl $15, %eax 1001; X86-SSE2-NEXT: sete %al 1002; X86-SSE2-NEXT: movl %ebp, %esp 1003; X86-SSE2-NEXT: popl %ebp 1004; X86-SSE2-NEXT: retl 1005; 1006; X64-SSE2-LABEL: icmp0_v16i32_v16i1: 1007; X64-SSE2: # %bb.0: 1008; X64-SSE2-NEXT: por %xmm3, %xmm1 1009; X64-SSE2-NEXT: por %xmm2, %xmm0 1010; X64-SSE2-NEXT: por %xmm1, %xmm0 1011; X64-SSE2-NEXT: pxor %xmm1, %xmm1 1012; X64-SSE2-NEXT: pcmpeqd %xmm0, %xmm1 1013; X64-SSE2-NEXT: movmskps %xmm1, %eax 1014; X64-SSE2-NEXT: xorl $15, %eax 1015; X64-SSE2-NEXT: sete %al 1016; X64-SSE2-NEXT: retq 1017; 1018; SSE41-LABEL: icmp0_v16i32_v16i1: 1019; SSE41: # %bb.0: 1020; SSE41-NEXT: por %xmm3, %xmm1 1021; SSE41-NEXT: por %xmm2, %xmm0 1022; SSE41-NEXT: por %xmm1, %xmm0 1023; SSE41-NEXT: ptest %xmm0, %xmm0 1024; SSE41-NEXT: sete %al 1025; SSE41-NEXT: retq 1026; 1027; AVX1-LABEL: icmp0_v16i32_v16i1: 1028; AVX1: # %bb.0: 1029; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 1030; AVX1-NEXT: vptest %ymm0, %ymm0 1031; AVX1-NEXT: sete %al 1032; AVX1-NEXT: vzeroupper 1033; AVX1-NEXT: retq 1034; 1035; AVX2-LABEL: icmp0_v16i32_v16i1: 1036; AVX2: # %bb.0: 1037; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 1038; AVX2-NEXT: vptest %ymm0, %ymm0 1039; AVX2-NEXT: sete %al 1040; AVX2-NEXT: vzeroupper 1041; AVX2-NEXT: retq 1042; 1043; AVX512-LABEL: icmp0_v16i32_v16i1: 1044; AVX512: # %bb.0: 1045; AVX512-NEXT: vptestmd %zmm0, %zmm0, %k0 1046; AVX512-NEXT: kortestw %k0, %k0 1047; AVX512-NEXT: sete %al 1048; AVX512-NEXT: vzeroupper 1049; AVX512-NEXT: retq 1050 %a = icmp eq <16 x i32> %0, zeroinitializer 1051 %b = call i1 @llvm.vector.reduce.and.v16i1(<16 x i1> %a) 1052 ret i1 %b 1053} 1054 1055define i1 @icmp0_v32i16_v32i1(<32 x i16>) nounwind { 1056; X86-SSE2-LABEL: icmp0_v32i16_v32i1: 1057; X86-SSE2: # %bb.0: 1058; X86-SSE2-NEXT: pushl %ebp 1059; X86-SSE2-NEXT: movl %esp, %ebp 1060; X86-SSE2-NEXT: andl $-16, %esp 1061; X86-SSE2-NEXT: subl $16, %esp 1062; X86-SSE2-NEXT: por %xmm2, %xmm0 1063; X86-SSE2-NEXT: por 8(%ebp), %xmm1 1064; X86-SSE2-NEXT: por %xmm0, %xmm1 1065; X86-SSE2-NEXT: pxor %xmm0, %xmm0 1066; X86-SSE2-NEXT: pcmpeqb %xmm1, %xmm0 1067; X86-SSE2-NEXT: pmovmskb %xmm0, %eax 1068; X86-SSE2-NEXT: xorl $65535, %eax # imm = 0xFFFF 1069; X86-SSE2-NEXT: sete %al 1070; X86-SSE2-NEXT: movl %ebp, %esp 1071; X86-SSE2-NEXT: popl %ebp 1072; X86-SSE2-NEXT: retl 1073; 1074; X64-SSE2-LABEL: icmp0_v32i16_v32i1: 1075; X64-SSE2: # %bb.0: 1076; X64-SSE2-NEXT: por %xmm3, %xmm1 1077; X64-SSE2-NEXT: por %xmm2, %xmm0 1078; X64-SSE2-NEXT: por %xmm1, %xmm0 1079; X64-SSE2-NEXT: pxor %xmm1, %xmm1 1080; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm1 1081; X64-SSE2-NEXT: pmovmskb %xmm1, %eax 1082; X64-SSE2-NEXT: xorl $65535, %eax # imm = 0xFFFF 1083; X64-SSE2-NEXT: sete %al 1084; X64-SSE2-NEXT: retq 1085; 1086; SSE41-LABEL: icmp0_v32i16_v32i1: 1087; SSE41: # %bb.0: 1088; SSE41-NEXT: por %xmm3, %xmm1 1089; SSE41-NEXT: por %xmm2, %xmm0 1090; SSE41-NEXT: por %xmm1, %xmm0 1091; SSE41-NEXT: ptest %xmm0, %xmm0 1092; SSE41-NEXT: sete %al 1093; SSE41-NEXT: retq 1094; 1095; AVX1-LABEL: icmp0_v32i16_v32i1: 1096; AVX1: # %bb.0: 1097; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 1098; AVX1-NEXT: vptest %ymm0, %ymm0 1099; AVX1-NEXT: sete %al 1100; AVX1-NEXT: vzeroupper 1101; AVX1-NEXT: retq 1102; 1103; AVX2-LABEL: icmp0_v32i16_v32i1: 1104; AVX2: # %bb.0: 1105; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 1106; AVX2-NEXT: vptest %ymm0, %ymm0 1107; AVX2-NEXT: sete %al 1108; AVX2-NEXT: vzeroupper 1109; AVX2-NEXT: retq 1110; 1111; AVX512-LABEL: icmp0_v32i16_v32i1: 1112; AVX512: # %bb.0: 1113; AVX512-NEXT: vptestmd %zmm0, %zmm0, %k0 1114; AVX512-NEXT: kortestw %k0, %k0 1115; AVX512-NEXT: sete %al 1116; AVX512-NEXT: vzeroupper 1117; AVX512-NEXT: retq 1118 %a = icmp eq <32 x i16> %0, zeroinitializer 1119 %b = call i1 @llvm.vector.reduce.and.v32i1(<32 x i1> %a) 1120 ret i1 %b 1121} 1122 1123define i1 @icmp0_v64i8_v64i1(<64 x i8>) nounwind { 1124; X86-SSE2-LABEL: icmp0_v64i8_v64i1: 1125; X86-SSE2: # %bb.0: 1126; X86-SSE2-NEXT: pushl %ebp 1127; X86-SSE2-NEXT: movl %esp, %ebp 1128; X86-SSE2-NEXT: andl $-16, %esp 1129; X86-SSE2-NEXT: subl $16, %esp 1130; X86-SSE2-NEXT: por %xmm2, %xmm0 1131; X86-SSE2-NEXT: por 8(%ebp), %xmm1 1132; X86-SSE2-NEXT: por %xmm0, %xmm1 1133; X86-SSE2-NEXT: pxor %xmm0, %xmm0 1134; X86-SSE2-NEXT: pcmpeqb %xmm1, %xmm0 1135; X86-SSE2-NEXT: pmovmskb %xmm0, %eax 1136; X86-SSE2-NEXT: xorl $65535, %eax # imm = 0xFFFF 1137; X86-SSE2-NEXT: sete %al 1138; X86-SSE2-NEXT: movl %ebp, %esp 1139; X86-SSE2-NEXT: popl %ebp 1140; X86-SSE2-NEXT: retl 1141; 1142; X64-SSE2-LABEL: icmp0_v64i8_v64i1: 1143; X64-SSE2: # %bb.0: 1144; X64-SSE2-NEXT: por %xmm3, %xmm1 1145; X64-SSE2-NEXT: por %xmm2, %xmm0 1146; X64-SSE2-NEXT: por %xmm1, %xmm0 1147; X64-SSE2-NEXT: pxor %xmm1, %xmm1 1148; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm1 1149; X64-SSE2-NEXT: pmovmskb %xmm1, %eax 1150; X64-SSE2-NEXT: xorl $65535, %eax # imm = 0xFFFF 1151; X64-SSE2-NEXT: sete %al 1152; X64-SSE2-NEXT: retq 1153; 1154; SSE41-LABEL: icmp0_v64i8_v64i1: 1155; SSE41: # %bb.0: 1156; SSE41-NEXT: por %xmm3, %xmm1 1157; SSE41-NEXT: por %xmm2, %xmm0 1158; SSE41-NEXT: por %xmm1, %xmm0 1159; SSE41-NEXT: ptest %xmm0, %xmm0 1160; SSE41-NEXT: sete %al 1161; SSE41-NEXT: retq 1162; 1163; AVX1-LABEL: icmp0_v64i8_v64i1: 1164; AVX1: # %bb.0: 1165; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 1166; AVX1-NEXT: vptest %ymm0, %ymm0 1167; AVX1-NEXT: sete %al 1168; AVX1-NEXT: vzeroupper 1169; AVX1-NEXT: retq 1170; 1171; AVX2-LABEL: icmp0_v64i8_v64i1: 1172; AVX2: # %bb.0: 1173; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 1174; AVX2-NEXT: vptest %ymm0, %ymm0 1175; AVX2-NEXT: sete %al 1176; AVX2-NEXT: vzeroupper 1177; AVX2-NEXT: retq 1178; 1179; AVX512-LABEL: icmp0_v64i8_v64i1: 1180; AVX512: # %bb.0: 1181; AVX512-NEXT: vptestmd %zmm0, %zmm0, %k0 1182; AVX512-NEXT: kortestw %k0, %k0 1183; AVX512-NEXT: sete %al 1184; AVX512-NEXT: vzeroupper 1185; AVX512-NEXT: retq 1186 %a = icmp eq <64 x i8> %0, zeroinitializer 1187 %b = call i1 @llvm.vector.reduce.and.v64i1(<64 x i1> %a) 1188 ret i1 %b 1189} 1190 1191define i8 @icmp0_v8i1(<8 x i8>) nounwind { 1192; SSE2-LABEL: icmp0_v8i1: 1193; SSE2: # %bb.0: 1194; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1195; SSE2-NEXT: psllw $15, %xmm0 1196; SSE2-NEXT: pmovmskb %xmm0, %eax 1197; SSE2-NEXT: testl $43690, %eax # imm = 0xAAAA 1198; SSE2-NEXT: sete %al 1199; SSE2-NEXT: ret{{[l|q]}} 1200; 1201; SSE41-LABEL: icmp0_v8i1: 1202; SSE41: # %bb.0: 1203; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 1204; SSE41-NEXT: psllw $15, %xmm0 1205; SSE41-NEXT: pmovmskb %xmm0, %eax 1206; SSE41-NEXT: testl $43690, %eax # imm = 0xAAAA 1207; SSE41-NEXT: sete %al 1208; SSE41-NEXT: retq 1209; 1210; AVX1OR2-LABEL: icmp0_v8i1: 1211; AVX1OR2: # %bb.0: 1212; AVX1OR2-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 1213; AVX1OR2-NEXT: vpsllw $15, %xmm0, %xmm0 1214; AVX1OR2-NEXT: vpmovmskb %xmm0, %eax 1215; AVX1OR2-NEXT: testl $43690, %eax # imm = 0xAAAA 1216; AVX1OR2-NEXT: sete %al 1217; AVX1OR2-NEXT: retq 1218; 1219; AVX512F-LABEL: icmp0_v8i1: 1220; AVX512F: # %bb.0: 1221; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 1222; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0 1223; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 1224; AVX512F-NEXT: kmovw %k0, %eax 1225; AVX512F-NEXT: testb %al, %al 1226; AVX512F-NEXT: sete %al 1227; AVX512F-NEXT: vzeroupper 1228; AVX512F-NEXT: retq 1229; 1230; AVX512BW-LABEL: icmp0_v8i1: 1231; AVX512BW: # %bb.0: 1232; AVX512BW-NEXT: vpsllw $7, %xmm0, %xmm0 1233; AVX512BW-NEXT: vpmovb2m %zmm0, %k0 1234; AVX512BW-NEXT: kmovd %k0, %eax 1235; AVX512BW-NEXT: testb %al, %al 1236; AVX512BW-NEXT: sete %al 1237; AVX512BW-NEXT: vzeroupper 1238; AVX512BW-NEXT: retq 1239; 1240; AVX512VL-LABEL: icmp0_v8i1: 1241; AVX512VL: # %bb.0: 1242; AVX512VL-NEXT: vpsllw $7, %xmm0, %xmm0 1243; AVX512VL-NEXT: vpmovb2m %xmm0, %k0 1244; AVX512VL-NEXT: kmovd %k0, %eax 1245; AVX512VL-NEXT: testb %al, %al 1246; AVX512VL-NEXT: sete %al 1247; AVX512VL-NEXT: retq 1248 %a = trunc <8 x i8> %0 to <8 x i1> 1249 %b = icmp eq <8 x i1> %a, zeroinitializer 1250 %c = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> %b) 1251 %d = zext i1 %c to i8 1252 ret i8 %d 1253} 1254 1255; 1256; Comparison With All Ones 1257; 1258 1259define i1 @icmp1_v2i64_v2i1(<2 x i64>) nounwind { 1260; SSE2-LABEL: icmp1_v2i64_v2i1: 1261; SSE2: # %bb.0: 1262; SSE2-NEXT: pcmpeqd %xmm1, %xmm1 1263; SSE2-NEXT: pcmpeqd %xmm0, %xmm1 1264; SSE2-NEXT: movmskps %xmm1, %eax 1265; SSE2-NEXT: xorl $15, %eax 1266; SSE2-NEXT: sete %al 1267; SSE2-NEXT: ret{{[l|q]}} 1268; 1269; SSE41-LABEL: icmp1_v2i64_v2i1: 1270; SSE41: # %bb.0: 1271; SSE41-NEXT: pcmpeqd %xmm1, %xmm1 1272; SSE41-NEXT: ptest %xmm1, %xmm0 1273; SSE41-NEXT: setb %al 1274; SSE41-NEXT: retq 1275; 1276; AVX-LABEL: icmp1_v2i64_v2i1: 1277; AVX: # %bb.0: 1278; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 1279; AVX-NEXT: vptest %xmm1, %xmm0 1280; AVX-NEXT: setb %al 1281; AVX-NEXT: retq 1282 %a = icmp eq <2 x i64> %0, <i64 -1, i64 -1> 1283 %b = call i1 @llvm.vector.reduce.and.v2i1(<2 x i1> %a) 1284 ret i1 %b 1285} 1286 1287define i1 @icmp1_v4i32_v4i1(<4 x i32>) nounwind { 1288; SSE2-LABEL: icmp1_v4i32_v4i1: 1289; SSE2: # %bb.0: 1290; SSE2-NEXT: pcmpeqd %xmm1, %xmm1 1291; SSE2-NEXT: pcmpeqd %xmm0, %xmm1 1292; SSE2-NEXT: movmskps %xmm1, %eax 1293; SSE2-NEXT: xorl $15, %eax 1294; SSE2-NEXT: sete %al 1295; SSE2-NEXT: ret{{[l|q]}} 1296; 1297; SSE41-LABEL: icmp1_v4i32_v4i1: 1298; SSE41: # %bb.0: 1299; SSE41-NEXT: pcmpeqd %xmm1, %xmm1 1300; SSE41-NEXT: ptest %xmm1, %xmm0 1301; SSE41-NEXT: setb %al 1302; SSE41-NEXT: retq 1303; 1304; AVX-LABEL: icmp1_v4i32_v4i1: 1305; AVX: # %bb.0: 1306; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 1307; AVX-NEXT: vptest %xmm1, %xmm0 1308; AVX-NEXT: setb %al 1309; AVX-NEXT: retq 1310 %a = icmp eq <4 x i32> %0, <i32 -1, i32 -1, i32 -1, i32 -1> 1311 %b = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> %a) 1312 ret i1 %b 1313} 1314 1315define i1 @icmp1_v8i16_v8i1(<8 x i16>) nounwind { 1316; SSE2-LABEL: icmp1_v8i16_v8i1: 1317; SSE2: # %bb.0: 1318; SSE2-NEXT: pcmpeqd %xmm1, %xmm1 1319; SSE2-NEXT: pcmpeqb %xmm0, %xmm1 1320; SSE2-NEXT: pmovmskb %xmm1, %eax 1321; SSE2-NEXT: xorl $65535, %eax # imm = 0xFFFF 1322; SSE2-NEXT: sete %al 1323; SSE2-NEXT: ret{{[l|q]}} 1324; 1325; SSE41-LABEL: icmp1_v8i16_v8i1: 1326; SSE41: # %bb.0: 1327; SSE41-NEXT: pcmpeqd %xmm1, %xmm1 1328; SSE41-NEXT: ptest %xmm1, %xmm0 1329; SSE41-NEXT: setb %al 1330; SSE41-NEXT: retq 1331; 1332; AVX-LABEL: icmp1_v8i16_v8i1: 1333; AVX: # %bb.0: 1334; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 1335; AVX-NEXT: vptest %xmm1, %xmm0 1336; AVX-NEXT: setb %al 1337; AVX-NEXT: retq 1338 %a = icmp eq <8 x i16> %0, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> 1339 %b = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> %a) 1340 ret i1 %b 1341} 1342 1343define i1 @icmp1_v16i8_v16i1(<16 x i8>) nounwind { 1344; SSE2-LABEL: icmp1_v16i8_v16i1: 1345; SSE2: # %bb.0: 1346; SSE2-NEXT: pcmpeqd %xmm1, %xmm1 1347; SSE2-NEXT: pcmpeqb %xmm0, %xmm1 1348; SSE2-NEXT: pmovmskb %xmm1, %eax 1349; SSE2-NEXT: xorl $65535, %eax # imm = 0xFFFF 1350; SSE2-NEXT: sete %al 1351; SSE2-NEXT: ret{{[l|q]}} 1352; 1353; SSE41-LABEL: icmp1_v16i8_v16i1: 1354; SSE41: # %bb.0: 1355; SSE41-NEXT: pcmpeqd %xmm1, %xmm1 1356; SSE41-NEXT: ptest %xmm1, %xmm0 1357; SSE41-NEXT: setb %al 1358; SSE41-NEXT: retq 1359; 1360; AVX-LABEL: icmp1_v16i8_v16i1: 1361; AVX: # %bb.0: 1362; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 1363; AVX-NEXT: vptest %xmm1, %xmm0 1364; AVX-NEXT: setb %al 1365; AVX-NEXT: retq 1366 %a = icmp eq <16 x i8> %0, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 1367 %b = call i1 @llvm.vector.reduce.and.v16i1(<16 x i1> %a) 1368 ret i1 %b 1369} 1370 1371define i1 @icmp1_v4i64_v4i1(<4 x i64>) nounwind { 1372; SSE2-LABEL: icmp1_v4i64_v4i1: 1373; SSE2: # %bb.0: 1374; SSE2-NEXT: pand %xmm1, %xmm0 1375; SSE2-NEXT: pcmpeqd %xmm1, %xmm1 1376; SSE2-NEXT: pcmpeqd %xmm0, %xmm1 1377; SSE2-NEXT: movmskps %xmm1, %eax 1378; SSE2-NEXT: xorl $15, %eax 1379; SSE2-NEXT: sete %al 1380; SSE2-NEXT: ret{{[l|q]}} 1381; 1382; SSE41-LABEL: icmp1_v4i64_v4i1: 1383; SSE41: # %bb.0: 1384; SSE41-NEXT: pand %xmm1, %xmm0 1385; SSE41-NEXT: pcmpeqd %xmm1, %xmm1 1386; SSE41-NEXT: ptest %xmm1, %xmm0 1387; SSE41-NEXT: setb %al 1388; SSE41-NEXT: retq 1389; 1390; AVX1-LABEL: icmp1_v4i64_v4i1: 1391; AVX1: # %bb.0: 1392; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 1393; AVX1-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1 1394; AVX1-NEXT: vptest %ymm1, %ymm0 1395; AVX1-NEXT: setb %al 1396; AVX1-NEXT: vzeroupper 1397; AVX1-NEXT: retq 1398; 1399; AVX2-LABEL: icmp1_v4i64_v4i1: 1400; AVX2: # %bb.0: 1401; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 1402; AVX2-NEXT: vptest %ymm1, %ymm0 1403; AVX2-NEXT: setb %al 1404; AVX2-NEXT: vzeroupper 1405; AVX2-NEXT: retq 1406; 1407; AVX512-LABEL: icmp1_v4i64_v4i1: 1408; AVX512: # %bb.0: 1409; AVX512-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 1410; AVX512-NEXT: vptest %ymm1, %ymm0 1411; AVX512-NEXT: setb %al 1412; AVX512-NEXT: vzeroupper 1413; AVX512-NEXT: retq 1414 %a = icmp eq <4 x i64> %0, <i64 -1, i64 -1, i64 -1, i64 -1> 1415 %b = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> %a) 1416 ret i1 %b 1417} 1418 1419define i1 @icmp1_v8i32_v8i1(<8 x i32>) nounwind { 1420; SSE2-LABEL: icmp1_v8i32_v8i1: 1421; SSE2: # %bb.0: 1422; SSE2-NEXT: pand %xmm1, %xmm0 1423; SSE2-NEXT: pcmpeqd %xmm1, %xmm1 1424; SSE2-NEXT: pcmpeqd %xmm0, %xmm1 1425; SSE2-NEXT: movmskps %xmm1, %eax 1426; SSE2-NEXT: xorl $15, %eax 1427; SSE2-NEXT: sete %al 1428; SSE2-NEXT: ret{{[l|q]}} 1429; 1430; SSE41-LABEL: icmp1_v8i32_v8i1: 1431; SSE41: # %bb.0: 1432; SSE41-NEXT: pand %xmm1, %xmm0 1433; SSE41-NEXT: pcmpeqd %xmm1, %xmm1 1434; SSE41-NEXT: ptest %xmm1, %xmm0 1435; SSE41-NEXT: setb %al 1436; SSE41-NEXT: retq 1437; 1438; AVX1-LABEL: icmp1_v8i32_v8i1: 1439; AVX1: # %bb.0: 1440; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 1441; AVX1-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1 1442; AVX1-NEXT: vptest %ymm1, %ymm0 1443; AVX1-NEXT: setb %al 1444; AVX1-NEXT: vzeroupper 1445; AVX1-NEXT: retq 1446; 1447; AVX2-LABEL: icmp1_v8i32_v8i1: 1448; AVX2: # %bb.0: 1449; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 1450; AVX2-NEXT: vptest %ymm1, %ymm0 1451; AVX2-NEXT: setb %al 1452; AVX2-NEXT: vzeroupper 1453; AVX2-NEXT: retq 1454; 1455; AVX512-LABEL: icmp1_v8i32_v8i1: 1456; AVX512: # %bb.0: 1457; AVX512-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 1458; AVX512-NEXT: vptest %ymm1, %ymm0 1459; AVX512-NEXT: setb %al 1460; AVX512-NEXT: vzeroupper 1461; AVX512-NEXT: retq 1462 %a = icmp eq <8 x i32> %0, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1> 1463 %b = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> %a) 1464 ret i1 %b 1465} 1466 1467define i1 @icmp1_v16i16_v16i1(<16 x i16>) nounwind { 1468; SSE2-LABEL: icmp1_v16i16_v16i1: 1469; SSE2: # %bb.0: 1470; SSE2-NEXT: pand %xmm1, %xmm0 1471; SSE2-NEXT: pcmpeqd %xmm1, %xmm1 1472; SSE2-NEXT: pcmpeqb %xmm0, %xmm1 1473; SSE2-NEXT: pmovmskb %xmm1, %eax 1474; SSE2-NEXT: xorl $65535, %eax # imm = 0xFFFF 1475; SSE2-NEXT: sete %al 1476; SSE2-NEXT: ret{{[l|q]}} 1477; 1478; SSE41-LABEL: icmp1_v16i16_v16i1: 1479; SSE41: # %bb.0: 1480; SSE41-NEXT: pand %xmm1, %xmm0 1481; SSE41-NEXT: pcmpeqd %xmm1, %xmm1 1482; SSE41-NEXT: ptest %xmm1, %xmm0 1483; SSE41-NEXT: setb %al 1484; SSE41-NEXT: retq 1485; 1486; AVX1-LABEL: icmp1_v16i16_v16i1: 1487; AVX1: # %bb.0: 1488; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 1489; AVX1-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1 1490; AVX1-NEXT: vptest %ymm1, %ymm0 1491; AVX1-NEXT: setb %al 1492; AVX1-NEXT: vzeroupper 1493; AVX1-NEXT: retq 1494; 1495; AVX2-LABEL: icmp1_v16i16_v16i1: 1496; AVX2: # %bb.0: 1497; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 1498; AVX2-NEXT: vptest %ymm1, %ymm0 1499; AVX2-NEXT: setb %al 1500; AVX2-NEXT: vzeroupper 1501; AVX2-NEXT: retq 1502; 1503; AVX512-LABEL: icmp1_v16i16_v16i1: 1504; AVX512: # %bb.0: 1505; AVX512-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 1506; AVX512-NEXT: vptest %ymm1, %ymm0 1507; AVX512-NEXT: setb %al 1508; AVX512-NEXT: vzeroupper 1509; AVX512-NEXT: retq 1510 %a = icmp eq <16 x i16> %0, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> 1511 %b = call i1 @llvm.vector.reduce.and.v16i1(<16 x i1> %a) 1512 ret i1 %b 1513} 1514 1515define i1 @icmp1_v32i8_v32i1(<32 x i8>) nounwind { 1516; SSE2-LABEL: icmp1_v32i8_v32i1: 1517; SSE2: # %bb.0: 1518; SSE2-NEXT: pand %xmm1, %xmm0 1519; SSE2-NEXT: pcmpeqd %xmm1, %xmm1 1520; SSE2-NEXT: pcmpeqb %xmm0, %xmm1 1521; SSE2-NEXT: pmovmskb %xmm1, %eax 1522; SSE2-NEXT: xorl $65535, %eax # imm = 0xFFFF 1523; SSE2-NEXT: sete %al 1524; SSE2-NEXT: ret{{[l|q]}} 1525; 1526; SSE41-LABEL: icmp1_v32i8_v32i1: 1527; SSE41: # %bb.0: 1528; SSE41-NEXT: pand %xmm1, %xmm0 1529; SSE41-NEXT: pcmpeqd %xmm1, %xmm1 1530; SSE41-NEXT: ptest %xmm1, %xmm0 1531; SSE41-NEXT: setb %al 1532; SSE41-NEXT: retq 1533; 1534; AVX1-LABEL: icmp1_v32i8_v32i1: 1535; AVX1: # %bb.0: 1536; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 1537; AVX1-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1 1538; AVX1-NEXT: vptest %ymm1, %ymm0 1539; AVX1-NEXT: setb %al 1540; AVX1-NEXT: vzeroupper 1541; AVX1-NEXT: retq 1542; 1543; AVX2-LABEL: icmp1_v32i8_v32i1: 1544; AVX2: # %bb.0: 1545; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 1546; AVX2-NEXT: vptest %ymm1, %ymm0 1547; AVX2-NEXT: setb %al 1548; AVX2-NEXT: vzeroupper 1549; AVX2-NEXT: retq 1550; 1551; AVX512-LABEL: icmp1_v32i8_v32i1: 1552; AVX512: # %bb.0: 1553; AVX512-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 1554; AVX512-NEXT: vptest %ymm1, %ymm0 1555; AVX512-NEXT: setb %al 1556; AVX512-NEXT: vzeroupper 1557; AVX512-NEXT: retq 1558 %a = icmp eq <32 x i8> %0, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 1559 %b = call i1 @llvm.vector.reduce.and.v32i1(<32 x i1> %a) 1560 ret i1 %b 1561} 1562 1563define i1 @icmp1_v8i64_v8i1(<8 x i64>) nounwind { 1564; X86-SSE2-LABEL: icmp1_v8i64_v8i1: 1565; X86-SSE2: # %bb.0: 1566; X86-SSE2-NEXT: pushl %ebp 1567; X86-SSE2-NEXT: movl %esp, %ebp 1568; X86-SSE2-NEXT: andl $-16, %esp 1569; X86-SSE2-NEXT: subl $16, %esp 1570; X86-SSE2-NEXT: pand %xmm2, %xmm0 1571; X86-SSE2-NEXT: pand 8(%ebp), %xmm1 1572; X86-SSE2-NEXT: pand %xmm0, %xmm1 1573; X86-SSE2-NEXT: pcmpeqd %xmm0, %xmm0 1574; X86-SSE2-NEXT: pcmpeqd %xmm1, %xmm0 1575; X86-SSE2-NEXT: movmskps %xmm0, %eax 1576; X86-SSE2-NEXT: xorl $15, %eax 1577; X86-SSE2-NEXT: sete %al 1578; X86-SSE2-NEXT: movl %ebp, %esp 1579; X86-SSE2-NEXT: popl %ebp 1580; X86-SSE2-NEXT: retl 1581; 1582; X64-SSE2-LABEL: icmp1_v8i64_v8i1: 1583; X64-SSE2: # %bb.0: 1584; X64-SSE2-NEXT: pand %xmm3, %xmm1 1585; X64-SSE2-NEXT: pand %xmm2, %xmm0 1586; X64-SSE2-NEXT: pand %xmm1, %xmm0 1587; X64-SSE2-NEXT: pcmpeqd %xmm1, %xmm1 1588; X64-SSE2-NEXT: pcmpeqd %xmm0, %xmm1 1589; X64-SSE2-NEXT: movmskps %xmm1, %eax 1590; X64-SSE2-NEXT: xorl $15, %eax 1591; X64-SSE2-NEXT: sete %al 1592; X64-SSE2-NEXT: retq 1593; 1594; SSE41-LABEL: icmp1_v8i64_v8i1: 1595; SSE41: # %bb.0: 1596; SSE41-NEXT: pand %xmm3, %xmm1 1597; SSE41-NEXT: pand %xmm2, %xmm0 1598; SSE41-NEXT: pand %xmm1, %xmm0 1599; SSE41-NEXT: pcmpeqd %xmm1, %xmm1 1600; SSE41-NEXT: ptest %xmm1, %xmm0 1601; SSE41-NEXT: setb %al 1602; SSE41-NEXT: retq 1603; 1604; AVX1-LABEL: icmp1_v8i64_v8i1: 1605; AVX1: # %bb.0: 1606; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 1607; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 1608; AVX1-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1 1609; AVX1-NEXT: vptest %ymm1, %ymm0 1610; AVX1-NEXT: setb %al 1611; AVX1-NEXT: vzeroupper 1612; AVX1-NEXT: retq 1613; 1614; AVX2-LABEL: icmp1_v8i64_v8i1: 1615; AVX2: # %bb.0: 1616; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 1617; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 1618; AVX2-NEXT: vptest %ymm1, %ymm0 1619; AVX2-NEXT: setb %al 1620; AVX2-NEXT: vzeroupper 1621; AVX2-NEXT: retq 1622; 1623; AVX512-LABEL: icmp1_v8i64_v8i1: 1624; AVX512: # %bb.0: 1625; AVX512-NEXT: vpternlogd {{.*#+}} zmm1 = -1 1626; AVX512-NEXT: vpcmpneqd %zmm1, %zmm0, %k0 1627; AVX512-NEXT: kortestw %k0, %k0 1628; AVX512-NEXT: sete %al 1629; AVX512-NEXT: vzeroupper 1630; AVX512-NEXT: retq 1631 %a = icmp eq <8 x i64> %0, <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1> 1632 %b = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> %a) 1633 ret i1 %b 1634} 1635 1636define i1 @icmp1_v16i32_v16i1(<16 x i32>) nounwind { 1637; X86-SSE2-LABEL: icmp1_v16i32_v16i1: 1638; X86-SSE2: # %bb.0: 1639; X86-SSE2-NEXT: pushl %ebp 1640; X86-SSE2-NEXT: movl %esp, %ebp 1641; X86-SSE2-NEXT: andl $-16, %esp 1642; X86-SSE2-NEXT: subl $16, %esp 1643; X86-SSE2-NEXT: pand %xmm2, %xmm0 1644; X86-SSE2-NEXT: pand 8(%ebp), %xmm1 1645; X86-SSE2-NEXT: pand %xmm0, %xmm1 1646; X86-SSE2-NEXT: pcmpeqd %xmm0, %xmm0 1647; X86-SSE2-NEXT: pcmpeqd %xmm1, %xmm0 1648; X86-SSE2-NEXT: movmskps %xmm0, %eax 1649; X86-SSE2-NEXT: xorl $15, %eax 1650; X86-SSE2-NEXT: sete %al 1651; X86-SSE2-NEXT: movl %ebp, %esp 1652; X86-SSE2-NEXT: popl %ebp 1653; X86-SSE2-NEXT: retl 1654; 1655; X64-SSE2-LABEL: icmp1_v16i32_v16i1: 1656; X64-SSE2: # %bb.0: 1657; X64-SSE2-NEXT: pand %xmm3, %xmm1 1658; X64-SSE2-NEXT: pand %xmm2, %xmm0 1659; X64-SSE2-NEXT: pand %xmm1, %xmm0 1660; X64-SSE2-NEXT: pcmpeqd %xmm1, %xmm1 1661; X64-SSE2-NEXT: pcmpeqd %xmm0, %xmm1 1662; X64-SSE2-NEXT: movmskps %xmm1, %eax 1663; X64-SSE2-NEXT: xorl $15, %eax 1664; X64-SSE2-NEXT: sete %al 1665; X64-SSE2-NEXT: retq 1666; 1667; SSE41-LABEL: icmp1_v16i32_v16i1: 1668; SSE41: # %bb.0: 1669; SSE41-NEXT: pand %xmm3, %xmm1 1670; SSE41-NEXT: pand %xmm2, %xmm0 1671; SSE41-NEXT: pand %xmm1, %xmm0 1672; SSE41-NEXT: pcmpeqd %xmm1, %xmm1 1673; SSE41-NEXT: ptest %xmm1, %xmm0 1674; SSE41-NEXT: setb %al 1675; SSE41-NEXT: retq 1676; 1677; AVX1-LABEL: icmp1_v16i32_v16i1: 1678; AVX1: # %bb.0: 1679; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 1680; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 1681; AVX1-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1 1682; AVX1-NEXT: vptest %ymm1, %ymm0 1683; AVX1-NEXT: setb %al 1684; AVX1-NEXT: vzeroupper 1685; AVX1-NEXT: retq 1686; 1687; AVX2-LABEL: icmp1_v16i32_v16i1: 1688; AVX2: # %bb.0: 1689; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 1690; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 1691; AVX2-NEXT: vptest %ymm1, %ymm0 1692; AVX2-NEXT: setb %al 1693; AVX2-NEXT: vzeroupper 1694; AVX2-NEXT: retq 1695; 1696; AVX512-LABEL: icmp1_v16i32_v16i1: 1697; AVX512: # %bb.0: 1698; AVX512-NEXT: vpternlogd {{.*#+}} zmm1 = -1 1699; AVX512-NEXT: vpcmpneqd %zmm1, %zmm0, %k0 1700; AVX512-NEXT: kortestw %k0, %k0 1701; AVX512-NEXT: sete %al 1702; AVX512-NEXT: vzeroupper 1703; AVX512-NEXT: retq 1704 %a = icmp eq <16 x i32> %0, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1> 1705 %b = call i1 @llvm.vector.reduce.and.v16i1(<16 x i1> %a) 1706 ret i1 %b 1707} 1708 1709define i1 @icmp1_v32i16_v32i1(<32 x i16>) nounwind { 1710; X86-SSE2-LABEL: icmp1_v32i16_v32i1: 1711; X86-SSE2: # %bb.0: 1712; X86-SSE2-NEXT: pushl %ebp 1713; X86-SSE2-NEXT: movl %esp, %ebp 1714; X86-SSE2-NEXT: andl $-16, %esp 1715; X86-SSE2-NEXT: subl $16, %esp 1716; X86-SSE2-NEXT: pand %xmm2, %xmm0 1717; X86-SSE2-NEXT: pand 8(%ebp), %xmm1 1718; X86-SSE2-NEXT: pand %xmm0, %xmm1 1719; X86-SSE2-NEXT: pcmpeqd %xmm0, %xmm0 1720; X86-SSE2-NEXT: pcmpeqb %xmm1, %xmm0 1721; X86-SSE2-NEXT: pmovmskb %xmm0, %eax 1722; X86-SSE2-NEXT: xorl $65535, %eax # imm = 0xFFFF 1723; X86-SSE2-NEXT: sete %al 1724; X86-SSE2-NEXT: movl %ebp, %esp 1725; X86-SSE2-NEXT: popl %ebp 1726; X86-SSE2-NEXT: retl 1727; 1728; X64-SSE2-LABEL: icmp1_v32i16_v32i1: 1729; X64-SSE2: # %bb.0: 1730; X64-SSE2-NEXT: pand %xmm3, %xmm1 1731; X64-SSE2-NEXT: pand %xmm2, %xmm0 1732; X64-SSE2-NEXT: pand %xmm1, %xmm0 1733; X64-SSE2-NEXT: pcmpeqd %xmm1, %xmm1 1734; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm1 1735; X64-SSE2-NEXT: pmovmskb %xmm1, %eax 1736; X64-SSE2-NEXT: xorl $65535, %eax # imm = 0xFFFF 1737; X64-SSE2-NEXT: sete %al 1738; X64-SSE2-NEXT: retq 1739; 1740; SSE41-LABEL: icmp1_v32i16_v32i1: 1741; SSE41: # %bb.0: 1742; SSE41-NEXT: pand %xmm3, %xmm1 1743; SSE41-NEXT: pand %xmm2, %xmm0 1744; SSE41-NEXT: pand %xmm1, %xmm0 1745; SSE41-NEXT: pcmpeqd %xmm1, %xmm1 1746; SSE41-NEXT: ptest %xmm1, %xmm0 1747; SSE41-NEXT: setb %al 1748; SSE41-NEXT: retq 1749; 1750; AVX1-LABEL: icmp1_v32i16_v32i1: 1751; AVX1: # %bb.0: 1752; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 1753; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 1754; AVX1-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1 1755; AVX1-NEXT: vptest %ymm1, %ymm0 1756; AVX1-NEXT: setb %al 1757; AVX1-NEXT: vzeroupper 1758; AVX1-NEXT: retq 1759; 1760; AVX2-LABEL: icmp1_v32i16_v32i1: 1761; AVX2: # %bb.0: 1762; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 1763; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 1764; AVX2-NEXT: vptest %ymm1, %ymm0 1765; AVX2-NEXT: setb %al 1766; AVX2-NEXT: vzeroupper 1767; AVX2-NEXT: retq 1768; 1769; AVX512-LABEL: icmp1_v32i16_v32i1: 1770; AVX512: # %bb.0: 1771; AVX512-NEXT: vpternlogd {{.*#+}} zmm1 = -1 1772; AVX512-NEXT: vpcmpneqd %zmm1, %zmm0, %k0 1773; AVX512-NEXT: kortestw %k0, %k0 1774; AVX512-NEXT: sete %al 1775; AVX512-NEXT: vzeroupper 1776; AVX512-NEXT: retq 1777 %a = icmp eq <32 x i16> %0, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> 1778 %b = call i1 @llvm.vector.reduce.and.v32i1(<32 x i1> %a) 1779 ret i1 %b 1780} 1781 1782define i1 @icmp1_v64i8_v64i1(<64 x i8>) nounwind { 1783; X86-SSE2-LABEL: icmp1_v64i8_v64i1: 1784; X86-SSE2: # %bb.0: 1785; X86-SSE2-NEXT: pushl %ebp 1786; X86-SSE2-NEXT: movl %esp, %ebp 1787; X86-SSE2-NEXT: andl $-16, %esp 1788; X86-SSE2-NEXT: subl $16, %esp 1789; X86-SSE2-NEXT: pand %xmm2, %xmm0 1790; X86-SSE2-NEXT: pand 8(%ebp), %xmm1 1791; X86-SSE2-NEXT: pand %xmm0, %xmm1 1792; X86-SSE2-NEXT: pcmpeqd %xmm0, %xmm0 1793; X86-SSE2-NEXT: pcmpeqb %xmm1, %xmm0 1794; X86-SSE2-NEXT: pmovmskb %xmm0, %eax 1795; X86-SSE2-NEXT: xorl $65535, %eax # imm = 0xFFFF 1796; X86-SSE2-NEXT: sete %al 1797; X86-SSE2-NEXT: movl %ebp, %esp 1798; X86-SSE2-NEXT: popl %ebp 1799; X86-SSE2-NEXT: retl 1800; 1801; X64-SSE2-LABEL: icmp1_v64i8_v64i1: 1802; X64-SSE2: # %bb.0: 1803; X64-SSE2-NEXT: pand %xmm3, %xmm1 1804; X64-SSE2-NEXT: pand %xmm2, %xmm0 1805; X64-SSE2-NEXT: pand %xmm1, %xmm0 1806; X64-SSE2-NEXT: pcmpeqd %xmm1, %xmm1 1807; X64-SSE2-NEXT: pcmpeqb %xmm0, %xmm1 1808; X64-SSE2-NEXT: pmovmskb %xmm1, %eax 1809; X64-SSE2-NEXT: xorl $65535, %eax # imm = 0xFFFF 1810; X64-SSE2-NEXT: sete %al 1811; X64-SSE2-NEXT: retq 1812; 1813; SSE41-LABEL: icmp1_v64i8_v64i1: 1814; SSE41: # %bb.0: 1815; SSE41-NEXT: pand %xmm3, %xmm1 1816; SSE41-NEXT: pand %xmm2, %xmm0 1817; SSE41-NEXT: pand %xmm1, %xmm0 1818; SSE41-NEXT: pcmpeqd %xmm1, %xmm1 1819; SSE41-NEXT: ptest %xmm1, %xmm0 1820; SSE41-NEXT: setb %al 1821; SSE41-NEXT: retq 1822; 1823; AVX1-LABEL: icmp1_v64i8_v64i1: 1824; AVX1: # %bb.0: 1825; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 1826; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 1827; AVX1-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1 1828; AVX1-NEXT: vptest %ymm1, %ymm0 1829; AVX1-NEXT: setb %al 1830; AVX1-NEXT: vzeroupper 1831; AVX1-NEXT: retq 1832; 1833; AVX2-LABEL: icmp1_v64i8_v64i1: 1834; AVX2: # %bb.0: 1835; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 1836; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 1837; AVX2-NEXT: vptest %ymm1, %ymm0 1838; AVX2-NEXT: setb %al 1839; AVX2-NEXT: vzeroupper 1840; AVX2-NEXT: retq 1841; 1842; AVX512-LABEL: icmp1_v64i8_v64i1: 1843; AVX512: # %bb.0: 1844; AVX512-NEXT: vpternlogd {{.*#+}} zmm1 = -1 1845; AVX512-NEXT: vpcmpneqd %zmm1, %zmm0, %k0 1846; AVX512-NEXT: kortestw %k0, %k0 1847; AVX512-NEXT: sete %al 1848; AVX512-NEXT: vzeroupper 1849; AVX512-NEXT: retq 1850 %a = icmp eq <64 x i8> %0, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 1851 %b = call i1 @llvm.vector.reduce.and.v64i1(<64 x i1> %a) 1852 ret i1 %b 1853} 1854 1855define i8 @icmp1_v8i1(<8 x i8>) nounwind { 1856; SSE2-LABEL: icmp1_v8i1: 1857; SSE2: # %bb.0: 1858; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1859; SSE2-NEXT: psllw $15, %xmm0 1860; SSE2-NEXT: packsswb %xmm0, %xmm0 1861; SSE2-NEXT: pmovmskb %xmm0, %eax 1862; SSE2-NEXT: cmpb $-1, %al 1863; SSE2-NEXT: sete %al 1864; SSE2-NEXT: ret{{[l|q]}} 1865; 1866; SSE41-LABEL: icmp1_v8i1: 1867; SSE41: # %bb.0: 1868; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 1869; SSE41-NEXT: psllw $15, %xmm0 1870; SSE41-NEXT: packsswb %xmm0, %xmm0 1871; SSE41-NEXT: pmovmskb %xmm0, %eax 1872; SSE41-NEXT: cmpb $-1, %al 1873; SSE41-NEXT: sete %al 1874; SSE41-NEXT: retq 1875; 1876; AVX1OR2-LABEL: icmp1_v8i1: 1877; AVX1OR2: # %bb.0: 1878; AVX1OR2-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 1879; AVX1OR2-NEXT: vpsllw $15, %xmm0, %xmm0 1880; AVX1OR2-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 1881; AVX1OR2-NEXT: vpmovmskb %xmm0, %eax 1882; AVX1OR2-NEXT: cmpb $-1, %al 1883; AVX1OR2-NEXT: sete %al 1884; AVX1OR2-NEXT: retq 1885; 1886; AVX512F-LABEL: icmp1_v8i1: 1887; AVX512F: # %bb.0: 1888; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 1889; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0 1890; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 1891; AVX512F-NEXT: kmovw %k0, %eax 1892; AVX512F-NEXT: cmpb $-1, %al 1893; AVX512F-NEXT: sete %al 1894; AVX512F-NEXT: vzeroupper 1895; AVX512F-NEXT: retq 1896; 1897; AVX512BW-LABEL: icmp1_v8i1: 1898; AVX512BW: # %bb.0: 1899; AVX512BW-NEXT: vpsllw $7, %xmm0, %xmm0 1900; AVX512BW-NEXT: vpmovb2m %zmm0, %k0 1901; AVX512BW-NEXT: kmovd %k0, %eax 1902; AVX512BW-NEXT: cmpb $-1, %al 1903; AVX512BW-NEXT: sete %al 1904; AVX512BW-NEXT: vzeroupper 1905; AVX512BW-NEXT: retq 1906; 1907; AVX512VL-LABEL: icmp1_v8i1: 1908; AVX512VL: # %bb.0: 1909; AVX512VL-NEXT: vpsllw $7, %xmm0, %xmm0 1910; AVX512VL-NEXT: vpmovb2m %xmm0, %k0 1911; AVX512VL-NEXT: kmovd %k0, %eax 1912; AVX512VL-NEXT: cmpb $-1, %al 1913; AVX512VL-NEXT: sete %al 1914; AVX512VL-NEXT: retq 1915 %a = trunc <8 x i8> %0 to <8 x i1> 1916 %b = icmp eq <8 x i1> %a, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1> 1917 %c = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> %b) 1918 %d = zext i1 %c to i8 1919 ret i8 %d 1920} 1921 1922; 1923; Comparison 1924; 1925 1926define i1 @icmp_v2i64_v2i1(<2 x i64>, <2 x i64>) nounwind { 1927; SSE2-LABEL: icmp_v2i64_v2i1: 1928; SSE2: # %bb.0: 1929; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 1930; SSE2-NEXT: movmskps %xmm0, %eax 1931; SSE2-NEXT: xorl $15, %eax 1932; SSE2-NEXT: sete %al 1933; SSE2-NEXT: ret{{[l|q]}} 1934; 1935; SSE41-LABEL: icmp_v2i64_v2i1: 1936; SSE41: # %bb.0: 1937; SSE41-NEXT: pxor %xmm1, %xmm0 1938; SSE41-NEXT: ptest %xmm0, %xmm0 1939; SSE41-NEXT: sete %al 1940; SSE41-NEXT: retq 1941; 1942; AVX-LABEL: icmp_v2i64_v2i1: 1943; AVX: # %bb.0: 1944; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 1945; AVX-NEXT: vptest %xmm0, %xmm0 1946; AVX-NEXT: sete %al 1947; AVX-NEXT: retq 1948 %a = icmp eq <2 x i64> %0, %1 1949 %b = call i1 @llvm.vector.reduce.and.v2i1(<2 x i1> %a) 1950 ret i1 %b 1951} 1952 1953define i1 @icmp_v4i32_v4i1(<4 x i32>, <4 x i32>) nounwind { 1954; SSE2-LABEL: icmp_v4i32_v4i1: 1955; SSE2: # %bb.0: 1956; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 1957; SSE2-NEXT: movmskps %xmm0, %eax 1958; SSE2-NEXT: xorl $15, %eax 1959; SSE2-NEXT: sete %al 1960; SSE2-NEXT: ret{{[l|q]}} 1961; 1962; SSE41-LABEL: icmp_v4i32_v4i1: 1963; SSE41: # %bb.0: 1964; SSE41-NEXT: pxor %xmm1, %xmm0 1965; SSE41-NEXT: ptest %xmm0, %xmm0 1966; SSE41-NEXT: sete %al 1967; SSE41-NEXT: retq 1968; 1969; AVX-LABEL: icmp_v4i32_v4i1: 1970; AVX: # %bb.0: 1971; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 1972; AVX-NEXT: vptest %xmm0, %xmm0 1973; AVX-NEXT: sete %al 1974; AVX-NEXT: retq 1975 %a = icmp eq <4 x i32> %0, %1 1976 %b = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> %a) 1977 ret i1 %b 1978} 1979 1980define i1 @icmp_v8i16_v8i1(<8 x i16>, <8 x i16>) nounwind { 1981; SSE2-LABEL: icmp_v8i16_v8i1: 1982; SSE2: # %bb.0: 1983; SSE2-NEXT: pcmpeqb %xmm1, %xmm0 1984; SSE2-NEXT: pmovmskb %xmm0, %eax 1985; SSE2-NEXT: xorl $65535, %eax # imm = 0xFFFF 1986; SSE2-NEXT: sete %al 1987; SSE2-NEXT: ret{{[l|q]}} 1988; 1989; SSE41-LABEL: icmp_v8i16_v8i1: 1990; SSE41: # %bb.0: 1991; SSE41-NEXT: pxor %xmm1, %xmm0 1992; SSE41-NEXT: ptest %xmm0, %xmm0 1993; SSE41-NEXT: sete %al 1994; SSE41-NEXT: retq 1995; 1996; AVX-LABEL: icmp_v8i16_v8i1: 1997; AVX: # %bb.0: 1998; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 1999; AVX-NEXT: vptest %xmm0, %xmm0 2000; AVX-NEXT: sete %al 2001; AVX-NEXT: retq 2002 %a = icmp eq <8 x i16> %0, %1 2003 %b = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> %a) 2004 ret i1 %b 2005} 2006 2007define i1 @icmp_v16i8_v16i1(<16 x i8>, <16 x i8>) nounwind { 2008; SSE2-LABEL: icmp_v16i8_v16i1: 2009; SSE2: # %bb.0: 2010; SSE2-NEXT: pcmpeqb %xmm1, %xmm0 2011; SSE2-NEXT: pmovmskb %xmm0, %eax 2012; SSE2-NEXT: xorl $65535, %eax # imm = 0xFFFF 2013; SSE2-NEXT: sete %al 2014; SSE2-NEXT: ret{{[l|q]}} 2015; 2016; SSE41-LABEL: icmp_v16i8_v16i1: 2017; SSE41: # %bb.0: 2018; SSE41-NEXT: pxor %xmm1, %xmm0 2019; SSE41-NEXT: ptest %xmm0, %xmm0 2020; SSE41-NEXT: sete %al 2021; SSE41-NEXT: retq 2022; 2023; AVX-LABEL: icmp_v16i8_v16i1: 2024; AVX: # %bb.0: 2025; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 2026; AVX-NEXT: vptest %xmm0, %xmm0 2027; AVX-NEXT: sete %al 2028; AVX-NEXT: retq 2029 %a = icmp eq <16 x i8> %0, %1 2030 %b = call i1 @llvm.vector.reduce.and.v16i1(<16 x i1> %a) 2031 ret i1 %b 2032} 2033 2034define i1 @icmp_v4i64_v4i1(<4 x i64>, <4 x i64>) nounwind { 2035; X86-SSE2-LABEL: icmp_v4i64_v4i1: 2036; X86-SSE2: # %bb.0: 2037; X86-SSE2-NEXT: pushl %ebp 2038; X86-SSE2-NEXT: movl %esp, %ebp 2039; X86-SSE2-NEXT: andl $-16, %esp 2040; X86-SSE2-NEXT: subl $16, %esp 2041; X86-SSE2-NEXT: pcmpeqd %xmm2, %xmm0 2042; X86-SSE2-NEXT: pcmpeqd 8(%ebp), %xmm1 2043; X86-SSE2-NEXT: pand %xmm0, %xmm1 2044; X86-SSE2-NEXT: movmskps %xmm1, %eax 2045; X86-SSE2-NEXT: xorl $15, %eax 2046; X86-SSE2-NEXT: sete %al 2047; X86-SSE2-NEXT: movl %ebp, %esp 2048; X86-SSE2-NEXT: popl %ebp 2049; X86-SSE2-NEXT: retl 2050; 2051; X64-SSE2-LABEL: icmp_v4i64_v4i1: 2052; X64-SSE2: # %bb.0: 2053; X64-SSE2-NEXT: pcmpeqd %xmm3, %xmm1 2054; X64-SSE2-NEXT: pcmpeqd %xmm2, %xmm0 2055; X64-SSE2-NEXT: pand %xmm1, %xmm0 2056; X64-SSE2-NEXT: movmskps %xmm0, %eax 2057; X64-SSE2-NEXT: xorl $15, %eax 2058; X64-SSE2-NEXT: sete %al 2059; X64-SSE2-NEXT: retq 2060; 2061; SSE41-LABEL: icmp_v4i64_v4i1: 2062; SSE41: # %bb.0: 2063; SSE41-NEXT: pxor %xmm3, %xmm1 2064; SSE41-NEXT: pxor %xmm2, %xmm0 2065; SSE41-NEXT: por %xmm1, %xmm0 2066; SSE41-NEXT: ptest %xmm0, %xmm0 2067; SSE41-NEXT: sete %al 2068; SSE41-NEXT: retq 2069; 2070; AVX1-LABEL: icmp_v4i64_v4i1: 2071; AVX1: # %bb.0: 2072; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0 2073; AVX1-NEXT: vptest %ymm0, %ymm0 2074; AVX1-NEXT: sete %al 2075; AVX1-NEXT: vzeroupper 2076; AVX1-NEXT: retq 2077; 2078; AVX2-LABEL: icmp_v4i64_v4i1: 2079; AVX2: # %bb.0: 2080; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0 2081; AVX2-NEXT: vptest %ymm0, %ymm0 2082; AVX2-NEXT: sete %al 2083; AVX2-NEXT: vzeroupper 2084; AVX2-NEXT: retq 2085; 2086; AVX512-LABEL: icmp_v4i64_v4i1: 2087; AVX512: # %bb.0: 2088; AVX512-NEXT: vpxor %ymm1, %ymm0, %ymm0 2089; AVX512-NEXT: vptest %ymm0, %ymm0 2090; AVX512-NEXT: sete %al 2091; AVX512-NEXT: vzeroupper 2092; AVX512-NEXT: retq 2093 %a = icmp eq <4 x i64> %0, %1 2094 %b = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> %a) 2095 ret i1 %b 2096} 2097 2098define i1 @icmp_v8i32_v8i1(<8 x i32>, <8 x i32>) nounwind { 2099; X86-SSE2-LABEL: icmp_v8i32_v8i1: 2100; X86-SSE2: # %bb.0: 2101; X86-SSE2-NEXT: pushl %ebp 2102; X86-SSE2-NEXT: movl %esp, %ebp 2103; X86-SSE2-NEXT: andl $-16, %esp 2104; X86-SSE2-NEXT: subl $16, %esp 2105; X86-SSE2-NEXT: pcmpeqd %xmm2, %xmm0 2106; X86-SSE2-NEXT: pcmpeqd 8(%ebp), %xmm1 2107; X86-SSE2-NEXT: pand %xmm0, %xmm1 2108; X86-SSE2-NEXT: movmskps %xmm1, %eax 2109; X86-SSE2-NEXT: xorl $15, %eax 2110; X86-SSE2-NEXT: sete %al 2111; X86-SSE2-NEXT: movl %ebp, %esp 2112; X86-SSE2-NEXT: popl %ebp 2113; X86-SSE2-NEXT: retl 2114; 2115; X64-SSE2-LABEL: icmp_v8i32_v8i1: 2116; X64-SSE2: # %bb.0: 2117; X64-SSE2-NEXT: pcmpeqd %xmm3, %xmm1 2118; X64-SSE2-NEXT: pcmpeqd %xmm2, %xmm0 2119; X64-SSE2-NEXT: pand %xmm1, %xmm0 2120; X64-SSE2-NEXT: movmskps %xmm0, %eax 2121; X64-SSE2-NEXT: xorl $15, %eax 2122; X64-SSE2-NEXT: sete %al 2123; X64-SSE2-NEXT: retq 2124; 2125; SSE41-LABEL: icmp_v8i32_v8i1: 2126; SSE41: # %bb.0: 2127; SSE41-NEXT: pxor %xmm3, %xmm1 2128; SSE41-NEXT: pxor %xmm2, %xmm0 2129; SSE41-NEXT: por %xmm1, %xmm0 2130; SSE41-NEXT: ptest %xmm0, %xmm0 2131; SSE41-NEXT: sete %al 2132; SSE41-NEXT: retq 2133; 2134; AVX1-LABEL: icmp_v8i32_v8i1: 2135; AVX1: # %bb.0: 2136; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0 2137; AVX1-NEXT: vptest %ymm0, %ymm0 2138; AVX1-NEXT: sete %al 2139; AVX1-NEXT: vzeroupper 2140; AVX1-NEXT: retq 2141; 2142; AVX2-LABEL: icmp_v8i32_v8i1: 2143; AVX2: # %bb.0: 2144; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0 2145; AVX2-NEXT: vptest %ymm0, %ymm0 2146; AVX2-NEXT: sete %al 2147; AVX2-NEXT: vzeroupper 2148; AVX2-NEXT: retq 2149; 2150; AVX512-LABEL: icmp_v8i32_v8i1: 2151; AVX512: # %bb.0: 2152; AVX512-NEXT: vpxor %ymm1, %ymm0, %ymm0 2153; AVX512-NEXT: vptest %ymm0, %ymm0 2154; AVX512-NEXT: sete %al 2155; AVX512-NEXT: vzeroupper 2156; AVX512-NEXT: retq 2157 %a = icmp eq <8 x i32> %0, %1 2158 %b = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> %a) 2159 ret i1 %b 2160} 2161 2162define i1 @icmp_v16i16_v16i1(<16 x i16>, <16 x i16>) nounwind { 2163; X86-SSE2-LABEL: icmp_v16i16_v16i1: 2164; X86-SSE2: # %bb.0: 2165; X86-SSE2-NEXT: pushl %ebp 2166; X86-SSE2-NEXT: movl %esp, %ebp 2167; X86-SSE2-NEXT: andl $-16, %esp 2168; X86-SSE2-NEXT: subl $16, %esp 2169; X86-SSE2-NEXT: pcmpeqb %xmm2, %xmm0 2170; X86-SSE2-NEXT: pcmpeqb 8(%ebp), %xmm1 2171; X86-SSE2-NEXT: pand %xmm0, %xmm1 2172; X86-SSE2-NEXT: pmovmskb %xmm1, %eax 2173; X86-SSE2-NEXT: xorl $65535, %eax # imm = 0xFFFF 2174; X86-SSE2-NEXT: sete %al 2175; X86-SSE2-NEXT: movl %ebp, %esp 2176; X86-SSE2-NEXT: popl %ebp 2177; X86-SSE2-NEXT: retl 2178; 2179; X64-SSE2-LABEL: icmp_v16i16_v16i1: 2180; X64-SSE2: # %bb.0: 2181; X64-SSE2-NEXT: pcmpeqb %xmm3, %xmm1 2182; X64-SSE2-NEXT: pcmpeqb %xmm2, %xmm0 2183; X64-SSE2-NEXT: pand %xmm1, %xmm0 2184; X64-SSE2-NEXT: pmovmskb %xmm0, %eax 2185; X64-SSE2-NEXT: xorl $65535, %eax # imm = 0xFFFF 2186; X64-SSE2-NEXT: sete %al 2187; X64-SSE2-NEXT: retq 2188; 2189; SSE41-LABEL: icmp_v16i16_v16i1: 2190; SSE41: # %bb.0: 2191; SSE41-NEXT: pxor %xmm3, %xmm1 2192; SSE41-NEXT: pxor %xmm2, %xmm0 2193; SSE41-NEXT: por %xmm1, %xmm0 2194; SSE41-NEXT: ptest %xmm0, %xmm0 2195; SSE41-NEXT: sete %al 2196; SSE41-NEXT: retq 2197; 2198; AVX1-LABEL: icmp_v16i16_v16i1: 2199; AVX1: # %bb.0: 2200; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0 2201; AVX1-NEXT: vptest %ymm0, %ymm0 2202; AVX1-NEXT: sete %al 2203; AVX1-NEXT: vzeroupper 2204; AVX1-NEXT: retq 2205; 2206; AVX2-LABEL: icmp_v16i16_v16i1: 2207; AVX2: # %bb.0: 2208; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0 2209; AVX2-NEXT: vptest %ymm0, %ymm0 2210; AVX2-NEXT: sete %al 2211; AVX2-NEXT: vzeroupper 2212; AVX2-NEXT: retq 2213; 2214; AVX512-LABEL: icmp_v16i16_v16i1: 2215; AVX512: # %bb.0: 2216; AVX512-NEXT: vpxor %ymm1, %ymm0, %ymm0 2217; AVX512-NEXT: vptest %ymm0, %ymm0 2218; AVX512-NEXT: sete %al 2219; AVX512-NEXT: vzeroupper 2220; AVX512-NEXT: retq 2221 %a = icmp eq <16 x i16> %0, %1 2222 %b = call i1 @llvm.vector.reduce.and.v16i1(<16 x i1> %a) 2223 ret i1 %b 2224} 2225 2226define i1 @icmp_v32i8_v32i1(<32 x i8>, <32 x i8>) nounwind { 2227; X86-SSE2-LABEL: icmp_v32i8_v32i1: 2228; X86-SSE2: # %bb.0: 2229; X86-SSE2-NEXT: pushl %ebp 2230; X86-SSE2-NEXT: movl %esp, %ebp 2231; X86-SSE2-NEXT: andl $-16, %esp 2232; X86-SSE2-NEXT: subl $16, %esp 2233; X86-SSE2-NEXT: pcmpeqb %xmm2, %xmm0 2234; X86-SSE2-NEXT: pcmpeqb 8(%ebp), %xmm1 2235; X86-SSE2-NEXT: pand %xmm0, %xmm1 2236; X86-SSE2-NEXT: pmovmskb %xmm1, %eax 2237; X86-SSE2-NEXT: xorl $65535, %eax # imm = 0xFFFF 2238; X86-SSE2-NEXT: sete %al 2239; X86-SSE2-NEXT: movl %ebp, %esp 2240; X86-SSE2-NEXT: popl %ebp 2241; X86-SSE2-NEXT: retl 2242; 2243; X64-SSE2-LABEL: icmp_v32i8_v32i1: 2244; X64-SSE2: # %bb.0: 2245; X64-SSE2-NEXT: pcmpeqb %xmm3, %xmm1 2246; X64-SSE2-NEXT: pcmpeqb %xmm2, %xmm0 2247; X64-SSE2-NEXT: pand %xmm1, %xmm0 2248; X64-SSE2-NEXT: pmovmskb %xmm0, %eax 2249; X64-SSE2-NEXT: xorl $65535, %eax # imm = 0xFFFF 2250; X64-SSE2-NEXT: sete %al 2251; X64-SSE2-NEXT: retq 2252; 2253; SSE41-LABEL: icmp_v32i8_v32i1: 2254; SSE41: # %bb.0: 2255; SSE41-NEXT: pxor %xmm3, %xmm1 2256; SSE41-NEXT: pxor %xmm2, %xmm0 2257; SSE41-NEXT: por %xmm1, %xmm0 2258; SSE41-NEXT: ptest %xmm0, %xmm0 2259; SSE41-NEXT: sete %al 2260; SSE41-NEXT: retq 2261; 2262; AVX1-LABEL: icmp_v32i8_v32i1: 2263; AVX1: # %bb.0: 2264; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0 2265; AVX1-NEXT: vptest %ymm0, %ymm0 2266; AVX1-NEXT: sete %al 2267; AVX1-NEXT: vzeroupper 2268; AVX1-NEXT: retq 2269; 2270; AVX2-LABEL: icmp_v32i8_v32i1: 2271; AVX2: # %bb.0: 2272; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0 2273; AVX2-NEXT: vptest %ymm0, %ymm0 2274; AVX2-NEXT: sete %al 2275; AVX2-NEXT: vzeroupper 2276; AVX2-NEXT: retq 2277; 2278; AVX512-LABEL: icmp_v32i8_v32i1: 2279; AVX512: # %bb.0: 2280; AVX512-NEXT: vpxor %ymm1, %ymm0, %ymm0 2281; AVX512-NEXT: vptest %ymm0, %ymm0 2282; AVX512-NEXT: sete %al 2283; AVX512-NEXT: vzeroupper 2284; AVX512-NEXT: retq 2285 %a = icmp eq <32 x i8> %0, %1 2286 %b = call i1 @llvm.vector.reduce.and.v32i1(<32 x i1> %a) 2287 ret i1 %b 2288} 2289 2290define i1 @icmp_v8i64_v8i1(<8 x i64>, <8 x i64>) nounwind { 2291; X86-SSE2-LABEL: icmp_v8i64_v8i1: 2292; X86-SSE2: # %bb.0: 2293; X86-SSE2-NEXT: pushl %ebp 2294; X86-SSE2-NEXT: movl %esp, %ebp 2295; X86-SSE2-NEXT: andl $-16, %esp 2296; X86-SSE2-NEXT: subl $16, %esp 2297; X86-SSE2-NEXT: movdqa 8(%ebp), %xmm3 2298; X86-SSE2-NEXT: pcmpeqd 72(%ebp), %xmm3 2299; X86-SSE2-NEXT: pcmpeqd 40(%ebp), %xmm1 2300; X86-SSE2-NEXT: pand %xmm3, %xmm1 2301; X86-SSE2-NEXT: pcmpeqd 56(%ebp), %xmm2 2302; X86-SSE2-NEXT: pcmpeqd 24(%ebp), %xmm0 2303; X86-SSE2-NEXT: pand %xmm2, %xmm0 2304; X86-SSE2-NEXT: pand %xmm1, %xmm0 2305; X86-SSE2-NEXT: movmskps %xmm0, %eax 2306; X86-SSE2-NEXT: xorl $15, %eax 2307; X86-SSE2-NEXT: sete %al 2308; X86-SSE2-NEXT: movl %ebp, %esp 2309; X86-SSE2-NEXT: popl %ebp 2310; X86-SSE2-NEXT: retl 2311; 2312; X64-SSE2-LABEL: icmp_v8i64_v8i1: 2313; X64-SSE2: # %bb.0: 2314; X64-SSE2-NEXT: pcmpeqd %xmm7, %xmm3 2315; X64-SSE2-NEXT: pcmpeqd %xmm5, %xmm1 2316; X64-SSE2-NEXT: pand %xmm3, %xmm1 2317; X64-SSE2-NEXT: pcmpeqd %xmm6, %xmm2 2318; X64-SSE2-NEXT: pcmpeqd %xmm4, %xmm0 2319; X64-SSE2-NEXT: pand %xmm2, %xmm0 2320; X64-SSE2-NEXT: pand %xmm1, %xmm0 2321; X64-SSE2-NEXT: movmskps %xmm0, %eax 2322; X64-SSE2-NEXT: xorl $15, %eax 2323; X64-SSE2-NEXT: sete %al 2324; X64-SSE2-NEXT: retq 2325; 2326; SSE41-LABEL: icmp_v8i64_v8i1: 2327; SSE41: # %bb.0: 2328; SSE41-NEXT: pxor %xmm7, %xmm3 2329; SSE41-NEXT: pxor %xmm5, %xmm1 2330; SSE41-NEXT: por %xmm3, %xmm1 2331; SSE41-NEXT: pxor %xmm6, %xmm2 2332; SSE41-NEXT: pxor %xmm4, %xmm0 2333; SSE41-NEXT: por %xmm2, %xmm0 2334; SSE41-NEXT: por %xmm1, %xmm0 2335; SSE41-NEXT: ptest %xmm0, %xmm0 2336; SSE41-NEXT: sete %al 2337; SSE41-NEXT: retq 2338; 2339; AVX1-LABEL: icmp_v8i64_v8i1: 2340; AVX1: # %bb.0: 2341; AVX1-NEXT: vxorps %ymm3, %ymm1, %ymm1 2342; AVX1-NEXT: vxorps %ymm2, %ymm0, %ymm0 2343; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 2344; AVX1-NEXT: vptest %ymm0, %ymm0 2345; AVX1-NEXT: sete %al 2346; AVX1-NEXT: vzeroupper 2347; AVX1-NEXT: retq 2348; 2349; AVX2-LABEL: icmp_v8i64_v8i1: 2350; AVX2: # %bb.0: 2351; AVX2-NEXT: vpxor %ymm3, %ymm1, %ymm1 2352; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0 2353; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 2354; AVX2-NEXT: vptest %ymm0, %ymm0 2355; AVX2-NEXT: sete %al 2356; AVX2-NEXT: vzeroupper 2357; AVX2-NEXT: retq 2358; 2359; AVX512-LABEL: icmp_v8i64_v8i1: 2360; AVX512: # %bb.0: 2361; AVX512-NEXT: vpcmpneqd %zmm1, %zmm0, %k0 2362; AVX512-NEXT: kortestw %k0, %k0 2363; AVX512-NEXT: sete %al 2364; AVX512-NEXT: vzeroupper 2365; AVX512-NEXT: retq 2366 %a = icmp eq <8 x i64> %0, %1 2367 %b = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> %a) 2368 ret i1 %b 2369} 2370 2371define i1 @icmp_v16i32_v16i1(<16 x i32>, <16 x i32>) nounwind { 2372; X86-SSE2-LABEL: icmp_v16i32_v16i1: 2373; X86-SSE2: # %bb.0: 2374; X86-SSE2-NEXT: pushl %ebp 2375; X86-SSE2-NEXT: movl %esp, %ebp 2376; X86-SSE2-NEXT: andl $-16, %esp 2377; X86-SSE2-NEXT: subl $16, %esp 2378; X86-SSE2-NEXT: movdqa 8(%ebp), %xmm3 2379; X86-SSE2-NEXT: pcmpeqd 72(%ebp), %xmm3 2380; X86-SSE2-NEXT: pcmpeqd 40(%ebp), %xmm1 2381; X86-SSE2-NEXT: pand %xmm3, %xmm1 2382; X86-SSE2-NEXT: pcmpeqd 56(%ebp), %xmm2 2383; X86-SSE2-NEXT: pcmpeqd 24(%ebp), %xmm0 2384; X86-SSE2-NEXT: pand %xmm2, %xmm0 2385; X86-SSE2-NEXT: pand %xmm1, %xmm0 2386; X86-SSE2-NEXT: movmskps %xmm0, %eax 2387; X86-SSE2-NEXT: xorl $15, %eax 2388; X86-SSE2-NEXT: sete %al 2389; X86-SSE2-NEXT: movl %ebp, %esp 2390; X86-SSE2-NEXT: popl %ebp 2391; X86-SSE2-NEXT: retl 2392; 2393; X64-SSE2-LABEL: icmp_v16i32_v16i1: 2394; X64-SSE2: # %bb.0: 2395; X64-SSE2-NEXT: pcmpeqd %xmm7, %xmm3 2396; X64-SSE2-NEXT: pcmpeqd %xmm5, %xmm1 2397; X64-SSE2-NEXT: pand %xmm3, %xmm1 2398; X64-SSE2-NEXT: pcmpeqd %xmm6, %xmm2 2399; X64-SSE2-NEXT: pcmpeqd %xmm4, %xmm0 2400; X64-SSE2-NEXT: pand %xmm2, %xmm0 2401; X64-SSE2-NEXT: pand %xmm1, %xmm0 2402; X64-SSE2-NEXT: movmskps %xmm0, %eax 2403; X64-SSE2-NEXT: xorl $15, %eax 2404; X64-SSE2-NEXT: sete %al 2405; X64-SSE2-NEXT: retq 2406; 2407; SSE41-LABEL: icmp_v16i32_v16i1: 2408; SSE41: # %bb.0: 2409; SSE41-NEXT: pxor %xmm7, %xmm3 2410; SSE41-NEXT: pxor %xmm5, %xmm1 2411; SSE41-NEXT: por %xmm3, %xmm1 2412; SSE41-NEXT: pxor %xmm6, %xmm2 2413; SSE41-NEXT: pxor %xmm4, %xmm0 2414; SSE41-NEXT: por %xmm2, %xmm0 2415; SSE41-NEXT: por %xmm1, %xmm0 2416; SSE41-NEXT: ptest %xmm0, %xmm0 2417; SSE41-NEXT: sete %al 2418; SSE41-NEXT: retq 2419; 2420; AVX1-LABEL: icmp_v16i32_v16i1: 2421; AVX1: # %bb.0: 2422; AVX1-NEXT: vxorps %ymm3, %ymm1, %ymm1 2423; AVX1-NEXT: vxorps %ymm2, %ymm0, %ymm0 2424; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 2425; AVX1-NEXT: vptest %ymm0, %ymm0 2426; AVX1-NEXT: sete %al 2427; AVX1-NEXT: vzeroupper 2428; AVX1-NEXT: retq 2429; 2430; AVX2-LABEL: icmp_v16i32_v16i1: 2431; AVX2: # %bb.0: 2432; AVX2-NEXT: vpxor %ymm3, %ymm1, %ymm1 2433; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0 2434; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 2435; AVX2-NEXT: vptest %ymm0, %ymm0 2436; AVX2-NEXT: sete %al 2437; AVX2-NEXT: vzeroupper 2438; AVX2-NEXT: retq 2439; 2440; AVX512-LABEL: icmp_v16i32_v16i1: 2441; AVX512: # %bb.0: 2442; AVX512-NEXT: vpcmpneqd %zmm1, %zmm0, %k0 2443; AVX512-NEXT: kortestw %k0, %k0 2444; AVX512-NEXT: sete %al 2445; AVX512-NEXT: vzeroupper 2446; AVX512-NEXT: retq 2447 %a = icmp eq <16 x i32> %0, %1 2448 %b = call i1 @llvm.vector.reduce.and.v16i1(<16 x i1> %a) 2449 ret i1 %b 2450} 2451 2452define i1 @icmp_v32i16_v32i1(<32 x i16>, <32 x i16>) nounwind { 2453; X86-SSE2-LABEL: icmp_v32i16_v32i1: 2454; X86-SSE2: # %bb.0: 2455; X86-SSE2-NEXT: pushl %ebp 2456; X86-SSE2-NEXT: movl %esp, %ebp 2457; X86-SSE2-NEXT: andl $-16, %esp 2458; X86-SSE2-NEXT: subl $16, %esp 2459; X86-SSE2-NEXT: movdqa 8(%ebp), %xmm3 2460; X86-SSE2-NEXT: pcmpeqb 72(%ebp), %xmm3 2461; X86-SSE2-NEXT: pcmpeqb 40(%ebp), %xmm1 2462; X86-SSE2-NEXT: pand %xmm3, %xmm1 2463; X86-SSE2-NEXT: pcmpeqb 56(%ebp), %xmm2 2464; X86-SSE2-NEXT: pcmpeqb 24(%ebp), %xmm0 2465; X86-SSE2-NEXT: pand %xmm2, %xmm0 2466; X86-SSE2-NEXT: pand %xmm1, %xmm0 2467; X86-SSE2-NEXT: pmovmskb %xmm0, %eax 2468; X86-SSE2-NEXT: xorl $65535, %eax # imm = 0xFFFF 2469; X86-SSE2-NEXT: sete %al 2470; X86-SSE2-NEXT: movl %ebp, %esp 2471; X86-SSE2-NEXT: popl %ebp 2472; X86-SSE2-NEXT: retl 2473; 2474; X64-SSE2-LABEL: icmp_v32i16_v32i1: 2475; X64-SSE2: # %bb.0: 2476; X64-SSE2-NEXT: pcmpeqb %xmm7, %xmm3 2477; X64-SSE2-NEXT: pcmpeqb %xmm5, %xmm1 2478; X64-SSE2-NEXT: pand %xmm3, %xmm1 2479; X64-SSE2-NEXT: pcmpeqb %xmm6, %xmm2 2480; X64-SSE2-NEXT: pcmpeqb %xmm4, %xmm0 2481; X64-SSE2-NEXT: pand %xmm2, %xmm0 2482; X64-SSE2-NEXT: pand %xmm1, %xmm0 2483; X64-SSE2-NEXT: pmovmskb %xmm0, %eax 2484; X64-SSE2-NEXT: xorl $65535, %eax # imm = 0xFFFF 2485; X64-SSE2-NEXT: sete %al 2486; X64-SSE2-NEXT: retq 2487; 2488; SSE41-LABEL: icmp_v32i16_v32i1: 2489; SSE41: # %bb.0: 2490; SSE41-NEXT: pxor %xmm7, %xmm3 2491; SSE41-NEXT: pxor %xmm5, %xmm1 2492; SSE41-NEXT: por %xmm3, %xmm1 2493; SSE41-NEXT: pxor %xmm6, %xmm2 2494; SSE41-NEXT: pxor %xmm4, %xmm0 2495; SSE41-NEXT: por %xmm2, %xmm0 2496; SSE41-NEXT: por %xmm1, %xmm0 2497; SSE41-NEXT: ptest %xmm0, %xmm0 2498; SSE41-NEXT: sete %al 2499; SSE41-NEXT: retq 2500; 2501; AVX1-LABEL: icmp_v32i16_v32i1: 2502; AVX1: # %bb.0: 2503; AVX1-NEXT: vxorps %ymm3, %ymm1, %ymm1 2504; AVX1-NEXT: vxorps %ymm2, %ymm0, %ymm0 2505; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 2506; AVX1-NEXT: vptest %ymm0, %ymm0 2507; AVX1-NEXT: sete %al 2508; AVX1-NEXT: vzeroupper 2509; AVX1-NEXT: retq 2510; 2511; AVX2-LABEL: icmp_v32i16_v32i1: 2512; AVX2: # %bb.0: 2513; AVX2-NEXT: vpxor %ymm3, %ymm1, %ymm1 2514; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0 2515; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 2516; AVX2-NEXT: vptest %ymm0, %ymm0 2517; AVX2-NEXT: sete %al 2518; AVX2-NEXT: vzeroupper 2519; AVX2-NEXT: retq 2520; 2521; AVX512-LABEL: icmp_v32i16_v32i1: 2522; AVX512: # %bb.0: 2523; AVX512-NEXT: vpcmpneqd %zmm1, %zmm0, %k0 2524; AVX512-NEXT: kortestw %k0, %k0 2525; AVX512-NEXT: sete %al 2526; AVX512-NEXT: vzeroupper 2527; AVX512-NEXT: retq 2528 %a = icmp eq <32 x i16> %0, %1 2529 %b = call i1 @llvm.vector.reduce.and.v32i1(<32 x i1> %a) 2530 ret i1 %b 2531} 2532 2533define i1 @icmp_v64i8_v64i1(<64 x i8>, <64 x i8>) nounwind { 2534; X86-SSE2-LABEL: icmp_v64i8_v64i1: 2535; X86-SSE2: # %bb.0: 2536; X86-SSE2-NEXT: pushl %ebp 2537; X86-SSE2-NEXT: movl %esp, %ebp 2538; X86-SSE2-NEXT: andl $-16, %esp 2539; X86-SSE2-NEXT: subl $16, %esp 2540; X86-SSE2-NEXT: movdqa 8(%ebp), %xmm3 2541; X86-SSE2-NEXT: pcmpeqb 72(%ebp), %xmm3 2542; X86-SSE2-NEXT: pcmpeqb 40(%ebp), %xmm1 2543; X86-SSE2-NEXT: pand %xmm3, %xmm1 2544; X86-SSE2-NEXT: pcmpeqb 56(%ebp), %xmm2 2545; X86-SSE2-NEXT: pcmpeqb 24(%ebp), %xmm0 2546; X86-SSE2-NEXT: pand %xmm2, %xmm0 2547; X86-SSE2-NEXT: pand %xmm1, %xmm0 2548; X86-SSE2-NEXT: pmovmskb %xmm0, %eax 2549; X86-SSE2-NEXT: xorl $65535, %eax # imm = 0xFFFF 2550; X86-SSE2-NEXT: sete %al 2551; X86-SSE2-NEXT: movl %ebp, %esp 2552; X86-SSE2-NEXT: popl %ebp 2553; X86-SSE2-NEXT: retl 2554; 2555; X64-SSE2-LABEL: icmp_v64i8_v64i1: 2556; X64-SSE2: # %bb.0: 2557; X64-SSE2-NEXT: pcmpeqb %xmm7, %xmm3 2558; X64-SSE2-NEXT: pcmpeqb %xmm5, %xmm1 2559; X64-SSE2-NEXT: pand %xmm3, %xmm1 2560; X64-SSE2-NEXT: pcmpeqb %xmm6, %xmm2 2561; X64-SSE2-NEXT: pcmpeqb %xmm4, %xmm0 2562; X64-SSE2-NEXT: pand %xmm2, %xmm0 2563; X64-SSE2-NEXT: pand %xmm1, %xmm0 2564; X64-SSE2-NEXT: pmovmskb %xmm0, %eax 2565; X64-SSE2-NEXT: xorl $65535, %eax # imm = 0xFFFF 2566; X64-SSE2-NEXT: sete %al 2567; X64-SSE2-NEXT: retq 2568; 2569; SSE41-LABEL: icmp_v64i8_v64i1: 2570; SSE41: # %bb.0: 2571; SSE41-NEXT: pxor %xmm7, %xmm3 2572; SSE41-NEXT: pxor %xmm5, %xmm1 2573; SSE41-NEXT: por %xmm3, %xmm1 2574; SSE41-NEXT: pxor %xmm6, %xmm2 2575; SSE41-NEXT: pxor %xmm4, %xmm0 2576; SSE41-NEXT: por %xmm2, %xmm0 2577; SSE41-NEXT: por %xmm1, %xmm0 2578; SSE41-NEXT: ptest %xmm0, %xmm0 2579; SSE41-NEXT: sete %al 2580; SSE41-NEXT: retq 2581; 2582; AVX1-LABEL: icmp_v64i8_v64i1: 2583; AVX1: # %bb.0: 2584; AVX1-NEXT: vxorps %ymm3, %ymm1, %ymm1 2585; AVX1-NEXT: vxorps %ymm2, %ymm0, %ymm0 2586; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 2587; AVX1-NEXT: vptest %ymm0, %ymm0 2588; AVX1-NEXT: sete %al 2589; AVX1-NEXT: vzeroupper 2590; AVX1-NEXT: retq 2591; 2592; AVX2-LABEL: icmp_v64i8_v64i1: 2593; AVX2: # %bb.0: 2594; AVX2-NEXT: vpxor %ymm3, %ymm1, %ymm1 2595; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0 2596; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 2597; AVX2-NEXT: vptest %ymm0, %ymm0 2598; AVX2-NEXT: sete %al 2599; AVX2-NEXT: vzeroupper 2600; AVX2-NEXT: retq 2601; 2602; AVX512-LABEL: icmp_v64i8_v64i1: 2603; AVX512: # %bb.0: 2604; AVX512-NEXT: vpcmpneqd %zmm1, %zmm0, %k0 2605; AVX512-NEXT: kortestw %k0, %k0 2606; AVX512-NEXT: sete %al 2607; AVX512-NEXT: vzeroupper 2608; AVX512-NEXT: retq 2609 %a = icmp eq <64 x i8> %0, %1 2610 %b = call i1 @llvm.vector.reduce.and.v64i1(<64 x i1> %a) 2611 ret i1 %b 2612} 2613 2614declare i1 @llvm.vector.reduce.and.v2i1(<2 x i1>) 2615declare i1 @llvm.vector.reduce.and.v4i1(<4 x i1>) 2616declare i1 @llvm.vector.reduce.and.v8i1(<8 x i1>) 2617declare i1 @llvm.vector.reduce.and.v16i1(<16 x i1>) 2618declare i1 @llvm.vector.reduce.and.v32i1(<32 x i1>) 2619declare i1 @llvm.vector.reduce.and.v64i1(<64 x i1>) 2620;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: 2621; SSE: {{.*}} 2622; X64-SSE: {{.*}} 2623