1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+SSE2 | FileCheck %s --check-prefix=SSE2-SSSE3 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+SSSE3 | FileCheck %s --check-prefix=SSE2-SSSE3 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX12,AVX1 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX12,AVX2 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefix=AVX512F 7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512bw | FileCheck %s --check-prefix=AVX512BW 8 9define i4 @v4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64> %c, <4 x i64> %d) { 10; SSE2-SSSE3-LABEL: v4i64: 11; SSE2-SSSE3: # %bb.0: 12; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm9 = [2147483648,2147483648] 13; SSE2-SSSE3-NEXT: pxor %xmm9, %xmm3 14; SSE2-SSSE3-NEXT: pxor %xmm9, %xmm1 15; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm10 16; SSE2-SSSE3-NEXT: pcmpgtd %xmm3, %xmm10 17; SSE2-SSSE3-NEXT: pxor %xmm9, %xmm2 18; SSE2-SSSE3-NEXT: pxor %xmm9, %xmm0 19; SSE2-SSSE3-NEXT: movdqa %xmm0, %xmm8 20; SSE2-SSSE3-NEXT: pcmpgtd %xmm2, %xmm8 21; SSE2-SSSE3-NEXT: movdqa %xmm8, %xmm11 22; SSE2-SSSE3-NEXT: shufps {{.*#+}} xmm11 = xmm11[0,2],xmm10[0,2] 23; SSE2-SSSE3-NEXT: pcmpeqd %xmm3, %xmm1 24; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm0 25; SSE2-SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3] 26; SSE2-SSSE3-NEXT: andps %xmm11, %xmm0 27; SSE2-SSSE3-NEXT: shufps {{.*#+}} xmm8 = xmm8[1,3],xmm10[1,3] 28; SSE2-SSSE3-NEXT: orps %xmm0, %xmm8 29; SSE2-SSSE3-NEXT: pxor %xmm9, %xmm7 30; SSE2-SSSE3-NEXT: pxor %xmm9, %xmm5 31; SSE2-SSSE3-NEXT: movdqa %xmm5, %xmm0 32; SSE2-SSSE3-NEXT: pcmpgtd %xmm7, %xmm0 33; SSE2-SSSE3-NEXT: pxor %xmm9, %xmm6 34; SSE2-SSSE3-NEXT: pxor %xmm9, %xmm4 35; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm1 36; SSE2-SSSE3-NEXT: pcmpgtd %xmm6, %xmm1 37; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm2 38; SSE2-SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm0[0,2] 39; SSE2-SSSE3-NEXT: pcmpeqd %xmm7, %xmm5 40; SSE2-SSSE3-NEXT: pcmpeqd %xmm6, %xmm4 41; SSE2-SSSE3-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,3],xmm5[1,3] 42; SSE2-SSSE3-NEXT: andps %xmm2, %xmm4 43; SSE2-SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,3],xmm0[1,3] 44; SSE2-SSSE3-NEXT: orps %xmm4, %xmm1 45; SSE2-SSSE3-NEXT: andps %xmm8, %xmm1 46; SSE2-SSSE3-NEXT: movmskps %xmm1, %eax 47; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax 48; SSE2-SSSE3-NEXT: retq 49; 50; AVX1-LABEL: v4i64: 51; AVX1: # %bb.0: 52; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 53; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm5 54; AVX1-NEXT: vpcmpgtq %xmm4, %xmm5, %xmm4 55; AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2 56; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm2, %ymm2 57; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 58; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 59; AVX1-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3 60; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 61; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 62; AVX1-NEXT: vandpd %ymm2, %ymm0, %ymm0 63; AVX1-NEXT: vmovmskpd %ymm0, %eax 64; AVX1-NEXT: # kill: def $al killed $al killed $eax 65; AVX1-NEXT: vzeroupper 66; AVX1-NEXT: retq 67; 68; AVX2-LABEL: v4i64: 69; AVX2: # %bb.0: 70; AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2 71; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 72; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0 73; AVX2-NEXT: vmovmskpd %ymm0, %eax 74; AVX2-NEXT: # kill: def $al killed $al killed $eax 75; AVX2-NEXT: vzeroupper 76; AVX2-NEXT: retq 77; 78; AVX512F-LABEL: v4i64: 79; AVX512F: # %bb.0: 80; AVX512F-NEXT: vpcmpgtq %ymm1, %ymm0, %k1 81; AVX512F-NEXT: vpcmpgtq %ymm3, %ymm2, %k0 {%k1} 82; AVX512F-NEXT: kmovw %k0, %eax 83; AVX512F-NEXT: # kill: def $al killed $al killed $eax 84; AVX512F-NEXT: vzeroupper 85; AVX512F-NEXT: retq 86; 87; AVX512BW-LABEL: v4i64: 88; AVX512BW: # %bb.0: 89; AVX512BW-NEXT: vpcmpgtq %ymm1, %ymm0, %k1 90; AVX512BW-NEXT: vpcmpgtq %ymm3, %ymm2, %k0 {%k1} 91; AVX512BW-NEXT: kmovd %k0, %eax 92; AVX512BW-NEXT: # kill: def $al killed $al killed $eax 93; AVX512BW-NEXT: vzeroupper 94; AVX512BW-NEXT: retq 95 %x0 = icmp sgt <4 x i64> %a, %b 96 %x1 = icmp sgt <4 x i64> %c, %d 97 %y = and <4 x i1> %x0, %x1 98 %res = bitcast <4 x i1> %y to i4 99 ret i4 %res 100} 101 102define i4 @v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %c, <4 x double> %d) { 103; SSE2-SSSE3-LABEL: v4f64: 104; SSE2-SSSE3: # %bb.0: 105; SSE2-SSSE3-NEXT: cmpltpd %xmm1, %xmm3 106; SSE2-SSSE3-NEXT: cmpltpd %xmm0, %xmm2 107; SSE2-SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2] 108; SSE2-SSSE3-NEXT: cmpltpd %xmm5, %xmm7 109; SSE2-SSSE3-NEXT: cmpltpd %xmm4, %xmm6 110; SSE2-SSSE3-NEXT: shufps {{.*#+}} xmm6 = xmm6[0,2],xmm7[0,2] 111; SSE2-SSSE3-NEXT: andps %xmm2, %xmm6 112; SSE2-SSSE3-NEXT: movmskps %xmm6, %eax 113; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax 114; SSE2-SSSE3-NEXT: retq 115; 116; AVX12-LABEL: v4f64: 117; AVX12: # %bb.0: 118; AVX12-NEXT: vcmpltpd %ymm2, %ymm3, %ymm2 119; AVX12-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0 120; AVX12-NEXT: vandpd %ymm2, %ymm0, %ymm0 121; AVX12-NEXT: vmovmskpd %ymm0, %eax 122; AVX12-NEXT: # kill: def $al killed $al killed $eax 123; AVX12-NEXT: vzeroupper 124; AVX12-NEXT: retq 125; 126; AVX512F-LABEL: v4f64: 127; AVX512F: # %bb.0: 128; AVX512F-NEXT: vcmpltpd %ymm0, %ymm1, %k1 129; AVX512F-NEXT: vcmpltpd %ymm2, %ymm3, %k0 {%k1} 130; AVX512F-NEXT: kmovw %k0, %eax 131; AVX512F-NEXT: # kill: def $al killed $al killed $eax 132; AVX512F-NEXT: vzeroupper 133; AVX512F-NEXT: retq 134; 135; AVX512BW-LABEL: v4f64: 136; AVX512BW: # %bb.0: 137; AVX512BW-NEXT: vcmpltpd %ymm0, %ymm1, %k1 138; AVX512BW-NEXT: vcmpltpd %ymm2, %ymm3, %k0 {%k1} 139; AVX512BW-NEXT: kmovd %k0, %eax 140; AVX512BW-NEXT: # kill: def $al killed $al killed $eax 141; AVX512BW-NEXT: vzeroupper 142; AVX512BW-NEXT: retq 143 %x0 = fcmp ogt <4 x double> %a, %b 144 %x1 = fcmp ogt <4 x double> %c, %d 145 %y = and <4 x i1> %x0, %x1 146 %res = bitcast <4 x i1> %y to i4 147 ret i4 %res 148} 149 150define i16 @v16i16(<16 x i16> %a, <16 x i16> %b, <16 x i16> %c, <16 x i16> %d) { 151; SSE2-SSSE3-LABEL: v16i16: 152; SSE2-SSSE3: # %bb.0: 153; SSE2-SSSE3-NEXT: pcmpgtw %xmm2, %xmm0 154; SSE2-SSSE3-NEXT: pcmpgtw %xmm3, %xmm1 155; SSE2-SSSE3-NEXT: pcmpgtw %xmm6, %xmm4 156; SSE2-SSSE3-NEXT: pand %xmm0, %xmm4 157; SSE2-SSSE3-NEXT: pcmpgtw %xmm7, %xmm5 158; SSE2-SSSE3-NEXT: pand %xmm1, %xmm5 159; SSE2-SSSE3-NEXT: packsswb %xmm5, %xmm4 160; SSE2-SSSE3-NEXT: pmovmskb %xmm4, %eax 161; SSE2-SSSE3-NEXT: # kill: def $ax killed $ax killed $eax 162; SSE2-SSSE3-NEXT: retq 163; 164; AVX1-LABEL: v16i16: 165; AVX1: # %bb.0: 166; AVX1-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm4 167; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 168; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 169; AVX1-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 170; AVX1-NEXT: vpcmpgtw %xmm3, %xmm2, %xmm1 171; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm1 172; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm3 173; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2 174; AVX1-NEXT: vpcmpgtw %xmm3, %xmm2, %xmm2 175; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 176; AVX1-NEXT: vpacksswb %xmm0, %xmm1, %xmm0 177; AVX1-NEXT: vpmovmskb %xmm0, %eax 178; AVX1-NEXT: # kill: def $ax killed $ax killed $eax 179; AVX1-NEXT: vzeroupper 180; AVX1-NEXT: retq 181; 182; AVX2-LABEL: v16i16: 183; AVX2: # %bb.0: 184; AVX2-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 185; AVX2-NEXT: vpcmpgtw %ymm3, %ymm2, %ymm1 186; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 187; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 188; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 189; AVX2-NEXT: vpmovmskb %xmm0, %eax 190; AVX2-NEXT: # kill: def $ax killed $ax killed $eax 191; AVX2-NEXT: vzeroupper 192; AVX2-NEXT: retq 193; 194; AVX512F-LABEL: v16i16: 195; AVX512F: # %bb.0: 196; AVX512F-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 197; AVX512F-NEXT: vpcmpgtw %ymm3, %ymm2, %ymm1 198; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm0 199; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0 200; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 201; AVX512F-NEXT: kmovw %k0, %eax 202; AVX512F-NEXT: # kill: def $ax killed $ax killed $eax 203; AVX512F-NEXT: vzeroupper 204; AVX512F-NEXT: retq 205; 206; AVX512BW-LABEL: v16i16: 207; AVX512BW: # %bb.0: 208; AVX512BW-NEXT: vpcmpgtw %ymm1, %ymm0, %k1 209; AVX512BW-NEXT: vpcmpgtw %ymm3, %ymm2, %k0 {%k1} 210; AVX512BW-NEXT: kmovd %k0, %eax 211; AVX512BW-NEXT: # kill: def $ax killed $ax killed $eax 212; AVX512BW-NEXT: vzeroupper 213; AVX512BW-NEXT: retq 214 %x0 = icmp sgt <16 x i16> %a, %b 215 %x1 = icmp sgt <16 x i16> %c, %d 216 %y = and <16 x i1> %x0, %x1 217 %res = bitcast <16 x i1> %y to i16 218 ret i16 %res 219} 220 221define i8 @v8i32_and(<8 x i32> %a, <8 x i32> %b, <8 x i32> %c, <8 x i32> %d) { 222; SSE2-SSSE3-LABEL: v8i32_and: 223; SSE2-SSSE3: # %bb.0: 224; SSE2-SSSE3-NEXT: pcmpgtd %xmm2, %xmm0 225; SSE2-SSSE3-NEXT: pcmpgtd %xmm3, %xmm1 226; SSE2-SSSE3-NEXT: pcmpgtd %xmm6, %xmm4 227; SSE2-SSSE3-NEXT: pand %xmm0, %xmm4 228; SSE2-SSSE3-NEXT: pcmpgtd %xmm7, %xmm5 229; SSE2-SSSE3-NEXT: pand %xmm1, %xmm5 230; SSE2-SSSE3-NEXT: packssdw %xmm5, %xmm4 231; SSE2-SSSE3-NEXT: packsswb %xmm4, %xmm4 232; SSE2-SSSE3-NEXT: pmovmskb %xmm4, %eax 233; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax 234; SSE2-SSSE3-NEXT: retq 235; 236; AVX1-LABEL: v8i32_and: 237; AVX1: # %bb.0: 238; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 239; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm5 240; AVX1-NEXT: vpcmpgtd %xmm4, %xmm5, %xmm4 241; AVX1-NEXT: vpcmpgtd %xmm3, %xmm2, %xmm2 242; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm2, %ymm2 243; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 244; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 245; AVX1-NEXT: vpcmpgtd %xmm3, %xmm4, %xmm3 246; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 247; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 248; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0 249; AVX1-NEXT: vmovmskps %ymm0, %eax 250; AVX1-NEXT: # kill: def $al killed $al killed $eax 251; AVX1-NEXT: vzeroupper 252; AVX1-NEXT: retq 253; 254; AVX2-LABEL: v8i32_and: 255; AVX2: # %bb.0: 256; AVX2-NEXT: vpcmpgtd %ymm3, %ymm2, %ymm2 257; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 258; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0 259; AVX2-NEXT: vmovmskps %ymm0, %eax 260; AVX2-NEXT: # kill: def $al killed $al killed $eax 261; AVX2-NEXT: vzeroupper 262; AVX2-NEXT: retq 263; 264; AVX512F-LABEL: v8i32_and: 265; AVX512F: # %bb.0: 266; AVX512F-NEXT: vpcmpgtd %ymm1, %ymm0, %k1 267; AVX512F-NEXT: vpcmpgtd %ymm3, %ymm2, %k0 {%k1} 268; AVX512F-NEXT: kmovw %k0, %eax 269; AVX512F-NEXT: # kill: def $al killed $al killed $eax 270; AVX512F-NEXT: vzeroupper 271; AVX512F-NEXT: retq 272; 273; AVX512BW-LABEL: v8i32_and: 274; AVX512BW: # %bb.0: 275; AVX512BW-NEXT: vpcmpgtd %ymm1, %ymm0, %k1 276; AVX512BW-NEXT: vpcmpgtd %ymm3, %ymm2, %k0 {%k1} 277; AVX512BW-NEXT: kmovd %k0, %eax 278; AVX512BW-NEXT: # kill: def $al killed $al killed $eax 279; AVX512BW-NEXT: vzeroupper 280; AVX512BW-NEXT: retq 281 %x0 = icmp sgt <8 x i32> %a, %b 282 %x1 = icmp sgt <8 x i32> %c, %d 283 %y = and <8 x i1> %x0, %x1 284 %res = bitcast <8 x i1> %y to i8 285 ret i8 %res 286} 287 288; We should see through any bitwise logic op. 289 290define i8 @v8i32_or(<8 x i32> %a, <8 x i32> %b, <8 x i32> %c, <8 x i32> %d) { 291; SSE2-SSSE3-LABEL: v8i32_or: 292; SSE2-SSSE3: # %bb.0: 293; SSE2-SSSE3-NEXT: pcmpgtd %xmm2, %xmm0 294; SSE2-SSSE3-NEXT: pcmpgtd %xmm3, %xmm1 295; SSE2-SSSE3-NEXT: pcmpgtd %xmm6, %xmm4 296; SSE2-SSSE3-NEXT: por %xmm0, %xmm4 297; SSE2-SSSE3-NEXT: pcmpgtd %xmm7, %xmm5 298; SSE2-SSSE3-NEXT: por %xmm1, %xmm5 299; SSE2-SSSE3-NEXT: packssdw %xmm5, %xmm4 300; SSE2-SSSE3-NEXT: packsswb %xmm4, %xmm4 301; SSE2-SSSE3-NEXT: pmovmskb %xmm4, %eax 302; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax 303; SSE2-SSSE3-NEXT: retq 304; 305; AVX1-LABEL: v8i32_or: 306; AVX1: # %bb.0: 307; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 308; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm5 309; AVX1-NEXT: vpcmpgtd %xmm4, %xmm5, %xmm4 310; AVX1-NEXT: vpcmpgtd %xmm3, %xmm2, %xmm2 311; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm2, %ymm2 312; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 313; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 314; AVX1-NEXT: vpcmpgtd %xmm3, %xmm4, %xmm3 315; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 316; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 317; AVX1-NEXT: vorps %ymm2, %ymm0, %ymm0 318; AVX1-NEXT: vmovmskps %ymm0, %eax 319; AVX1-NEXT: # kill: def $al killed $al killed $eax 320; AVX1-NEXT: vzeroupper 321; AVX1-NEXT: retq 322; 323; AVX2-LABEL: v8i32_or: 324; AVX2: # %bb.0: 325; AVX2-NEXT: vpcmpgtd %ymm3, %ymm2, %ymm2 326; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 327; AVX2-NEXT: vpor %ymm2, %ymm0, %ymm0 328; AVX2-NEXT: vmovmskps %ymm0, %eax 329; AVX2-NEXT: # kill: def $al killed $al killed $eax 330; AVX2-NEXT: vzeroupper 331; AVX2-NEXT: retq 332; 333; AVX512F-LABEL: v8i32_or: 334; AVX512F: # %bb.0: 335; AVX512F-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 336; AVX512F-NEXT: vpcmpgtd %ymm3, %ymm2, %k1 337; AVX512F-NEXT: korw %k1, %k0, %k0 338; AVX512F-NEXT: kmovw %k0, %eax 339; AVX512F-NEXT: # kill: def $al killed $al killed $eax 340; AVX512F-NEXT: vzeroupper 341; AVX512F-NEXT: retq 342; 343; AVX512BW-LABEL: v8i32_or: 344; AVX512BW: # %bb.0: 345; AVX512BW-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 346; AVX512BW-NEXT: vpcmpgtd %ymm3, %ymm2, %k1 347; AVX512BW-NEXT: korw %k1, %k0, %k0 348; AVX512BW-NEXT: kmovd %k0, %eax 349; AVX512BW-NEXT: # kill: def $al killed $al killed $eax 350; AVX512BW-NEXT: vzeroupper 351; AVX512BW-NEXT: retq 352 %x0 = icmp sgt <8 x i32> %a, %b 353 %x1 = icmp sgt <8 x i32> %c, %d 354 %y = or <8 x i1> %x0, %x1 355 %res = bitcast <8 x i1> %y to i8 356 ret i8 %res 357} 358 359; We should see through multiple bitwise logic ops. 360 361define i8 @v8i32_or_and(<8 x i32> %a, <8 x i32> %b, <8 x i32> %c, <8 x i32> %d, <8 x i32> %e, <8 x i32> %f) { 362; SSE2-SSSE3-LABEL: v8i32_or_and: 363; SSE2-SSSE3: # %bb.0: 364; SSE2-SSSE3-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm8 365; SSE2-SSSE3-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm9 366; SSE2-SSSE3-NEXT: pcmpgtd %xmm3, %xmm1 367; SSE2-SSSE3-NEXT: pcmpgtd %xmm2, %xmm0 368; SSE2-SSSE3-NEXT: pcmpgtd %xmm5, %xmm7 369; SSE2-SSSE3-NEXT: por %xmm1, %xmm7 370; SSE2-SSSE3-NEXT: pcmpgtd %xmm4, %xmm6 371; SSE2-SSSE3-NEXT: por %xmm0, %xmm6 372; SSE2-SSSE3-NEXT: pcmpeqd {{[0-9]+}}(%rsp), %xmm9 373; SSE2-SSSE3-NEXT: pand %xmm6, %xmm9 374; SSE2-SSSE3-NEXT: pcmpeqd {{[0-9]+}}(%rsp), %xmm8 375; SSE2-SSSE3-NEXT: pand %xmm7, %xmm8 376; SSE2-SSSE3-NEXT: packssdw %xmm8, %xmm9 377; SSE2-SSSE3-NEXT: packsswb %xmm9, %xmm9 378; SSE2-SSSE3-NEXT: pmovmskb %xmm9, %eax 379; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax 380; SSE2-SSSE3-NEXT: retq 381; 382; AVX1-LABEL: v8i32_or_and: 383; AVX1: # %bb.0: 384; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm6 385; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm7 386; AVX1-NEXT: vpcmpgtd %xmm6, %xmm7, %xmm6 387; AVX1-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm2 388; AVX1-NEXT: vinsertf128 $1, %xmm6, %ymm2, %ymm2 389; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 390; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm6 391; AVX1-NEXT: vpcmpgtd %xmm3, %xmm6, %xmm3 392; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 393; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 394; AVX1-NEXT: vorps %ymm2, %ymm0, %ymm0 395; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm1 396; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm2 397; AVX1-NEXT: vpcmpeqd %xmm1, %xmm2, %xmm1 398; AVX1-NEXT: vpcmpeqd %xmm5, %xmm4, %xmm2 399; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 400; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 401; AVX1-NEXT: vmovmskps %ymm0, %eax 402; AVX1-NEXT: # kill: def $al killed $al killed $eax 403; AVX1-NEXT: vzeroupper 404; AVX1-NEXT: retq 405; 406; AVX2-LABEL: v8i32_or_and: 407; AVX2: # %bb.0: 408; AVX2-NEXT: vpcmpgtd %ymm2, %ymm3, %ymm2 409; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 410; AVX2-NEXT: vpor %ymm2, %ymm0, %ymm0 411; AVX2-NEXT: vpcmpeqd %ymm5, %ymm4, %ymm1 412; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 413; AVX2-NEXT: vmovmskps %ymm0, %eax 414; AVX2-NEXT: # kill: def $al killed $al killed $eax 415; AVX2-NEXT: vzeroupper 416; AVX2-NEXT: retq 417; 418; AVX512F-LABEL: v8i32_or_and: 419; AVX512F: # %bb.0: 420; AVX512F-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 421; AVX512F-NEXT: vpcmpgtd %ymm2, %ymm3, %k1 422; AVX512F-NEXT: korw %k1, %k0, %k1 423; AVX512F-NEXT: vpcmpeqd %ymm5, %ymm4, %k0 {%k1} 424; AVX512F-NEXT: kmovw %k0, %eax 425; AVX512F-NEXT: # kill: def $al killed $al killed $eax 426; AVX512F-NEXT: vzeroupper 427; AVX512F-NEXT: retq 428; 429; AVX512BW-LABEL: v8i32_or_and: 430; AVX512BW: # %bb.0: 431; AVX512BW-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 432; AVX512BW-NEXT: vpcmpgtd %ymm2, %ymm3, %k1 433; AVX512BW-NEXT: korw %k1, %k0, %k1 434; AVX512BW-NEXT: vpcmpeqd %ymm5, %ymm4, %k0 {%k1} 435; AVX512BW-NEXT: kmovd %k0, %eax 436; AVX512BW-NEXT: # kill: def $al killed $al killed $eax 437; AVX512BW-NEXT: vzeroupper 438; AVX512BW-NEXT: retq 439 %x0 = icmp sgt <8 x i32> %a, %b 440 %x1 = icmp slt <8 x i32> %c, %d 441 %x2 = icmp eq <8 x i32> %e, %f 442 %y = or <8 x i1> %x0, %x1 443 %z = and <8 x i1> %y, %x2 444 %res = bitcast <8 x i1> %z to i8 445 ret i8 %res 446} 447 448; PR61104 - peek through vselect allones operand 449; combineBitcastvxi1 may be called before the fold vselect(c,-1,x) --> or(c,x) vXi1 fold 450 451define i8 @v8i32_or_vselect(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> %a2) { 452; SSE2-SSSE3-LABEL: v8i32_or_vselect: 453; SSE2-SSSE3: # %bb.0: 454; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm0 455; SSE2-SSSE3-NEXT: pcmpeqd %xmm3, %xmm1 456; SSE2-SSSE3-NEXT: por %xmm5, %xmm1 457; SSE2-SSSE3-NEXT: por %xmm4, %xmm0 458; SSE2-SSSE3-NEXT: packssdw %xmm1, %xmm0 459; SSE2-SSSE3-NEXT: packsswb %xmm0, %xmm0 460; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax 461; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax 462; SSE2-SSSE3-NEXT: retq 463; 464; AVX1-LABEL: v8i32_or_vselect: 465; AVX1: # %bb.0: 466; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 467; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 468; AVX1-NEXT: vpcmpeqd %xmm3, %xmm4, %xmm3 469; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 470; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 471; AVX1-NEXT: vorps %ymm2, %ymm0, %ymm0 472; AVX1-NEXT: vmovmskps %ymm0, %eax 473; AVX1-NEXT: # kill: def $al killed $al killed $eax 474; AVX1-NEXT: vzeroupper 475; AVX1-NEXT: retq 476; 477; AVX2-LABEL: v8i32_or_vselect: 478; AVX2: # %bb.0: 479; AVX2-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 480; AVX2-NEXT: vpor %ymm2, %ymm0, %ymm0 481; AVX2-NEXT: vmovmskps %ymm0, %eax 482; AVX2-NEXT: # kill: def $al killed $al killed $eax 483; AVX2-NEXT: vzeroupper 484; AVX2-NEXT: retq 485; 486; AVX512F-LABEL: v8i32_or_vselect: 487; AVX512F: # %bb.0: 488; AVX512F-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 489; AVX512F-NEXT: vpxor %xmm0, %xmm0, %xmm0 490; AVX512F-NEXT: vpcmpgtd %ymm2, %ymm0, %k1 491; AVX512F-NEXT: korw %k1, %k0, %k0 492; AVX512F-NEXT: kmovw %k0, %eax 493; AVX512F-NEXT: # kill: def $al killed $al killed $eax 494; AVX512F-NEXT: vzeroupper 495; AVX512F-NEXT: retq 496; 497; AVX512BW-LABEL: v8i32_or_vselect: 498; AVX512BW: # %bb.0: 499; AVX512BW-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 500; AVX512BW-NEXT: vpxor %xmm0, %xmm0, %xmm0 501; AVX512BW-NEXT: vpcmpgtd %ymm2, %ymm0, %k1 502; AVX512BW-NEXT: korw %k1, %k0, %k0 503; AVX512BW-NEXT: kmovd %k0, %eax 504; AVX512BW-NEXT: # kill: def $al killed $al killed $eax 505; AVX512BW-NEXT: vzeroupper 506; AVX512BW-NEXT: retq 507 %cmp = icmp eq <8 x i32> %a0, %a1 508 %slt = icmp slt <8 x i32> %a2, zeroinitializer 509 %sel = select <8 x i1> %cmp, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i1> %slt 510 %res = bitcast <8 x i1> %sel to i8 511 ret i8 %res 512} 513 514define i8 @v8i32_or_select(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> %a2, <8 x i32> %a3, i1 %a4) { 515; SSE2-SSSE3-LABEL: v8i32_or_select: 516; SSE2-SSSE3: # %bb.0: 517; SSE2-SSSE3-NEXT: pcmpeqd %xmm0, %xmm2 518; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm3 519; SSE2-SSSE3-NEXT: testb $1, %dil 520; SSE2-SSSE3-NEXT: jne .LBB7_1 521; SSE2-SSSE3-NEXT: # %bb.2: 522; SSE2-SSSE3-NEXT: pxor %xmm0, %xmm0 523; SSE2-SSSE3-NEXT: jmp .LBB7_3 524; SSE2-SSSE3-NEXT: .LBB7_1: 525; SSE2-SSSE3-NEXT: pcmpeqd %xmm5, %xmm1 526; SSE2-SSSE3-NEXT: pcmpeqd %xmm4, %xmm0 527; SSE2-SSSE3-NEXT: packssdw %xmm1, %xmm0 528; SSE2-SSSE3-NEXT: .LBB7_3: 529; SSE2-SSSE3-NEXT: por %xmm3, %xmm7 530; SSE2-SSSE3-NEXT: por %xmm2, %xmm6 531; SSE2-SSSE3-NEXT: packssdw %xmm7, %xmm6 532; SSE2-SSSE3-NEXT: por %xmm0, %xmm6 533; SSE2-SSSE3-NEXT: packsswb %xmm6, %xmm6 534; SSE2-SSSE3-NEXT: pmovmskb %xmm6, %eax 535; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax 536; SSE2-SSSE3-NEXT: retq 537; 538; AVX1-LABEL: v8i32_or_select: 539; AVX1: # %bb.0: 540; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 541; AVX1-NEXT: testb $1, %dil 542; AVX1-NEXT: jne .LBB7_1 543; AVX1-NEXT: # %bb.2: 544; AVX1-NEXT: vxorps %xmm2, %xmm2, %xmm2 545; AVX1-NEXT: jmp .LBB7_3 546; AVX1-NEXT: .LBB7_1: 547; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm5 548; AVX1-NEXT: vpcmpeqd %xmm5, %xmm4, %xmm5 549; AVX1-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm2 550; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm2, %ymm2 551; AVX1-NEXT: .LBB7_3: 552; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm5 553; AVX1-NEXT: vpcmpeqd %xmm5, %xmm4, %xmm4 554; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 555; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0 556; AVX1-NEXT: vorps %ymm0, %ymm3, %ymm0 557; AVX1-NEXT: vorps %ymm2, %ymm0, %ymm0 558; AVX1-NEXT: vmovmskps %ymm0, %eax 559; AVX1-NEXT: # kill: def $al killed $al killed $eax 560; AVX1-NEXT: vzeroupper 561; AVX1-NEXT: retq 562; 563; AVX2-LABEL: v8i32_or_select: 564; AVX2: # %bb.0: 565; AVX2-NEXT: testb $1, %dil 566; AVX2-NEXT: jne .LBB7_1 567; AVX2-NEXT: # %bb.2: 568; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 569; AVX2-NEXT: jmp .LBB7_3 570; AVX2-NEXT: .LBB7_1: 571; AVX2-NEXT: vpcmpeqd %ymm2, %ymm0, %ymm2 572; AVX2-NEXT: .LBB7_3: 573; AVX2-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 574; AVX2-NEXT: vpor %ymm0, %ymm3, %ymm0 575; AVX2-NEXT: vpor %ymm2, %ymm0, %ymm0 576; AVX2-NEXT: vmovmskps %ymm0, %eax 577; AVX2-NEXT: # kill: def $al killed $al killed $eax 578; AVX2-NEXT: vzeroupper 579; AVX2-NEXT: retq 580; 581; AVX512F-LABEL: v8i32_or_select: 582; AVX512F: # %bb.0: 583; AVX512F-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 584; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 585; AVX512F-NEXT: vpcmpgtd %ymm3, %ymm1, %k1 586; AVX512F-NEXT: testb $1, %dil 587; AVX512F-NEXT: jne .LBB7_1 588; AVX512F-NEXT: # %bb.2: 589; AVX512F-NEXT: kxorw %k0, %k0, %k2 590; AVX512F-NEXT: jmp .LBB7_3 591; AVX512F-NEXT: .LBB7_1: 592; AVX512F-NEXT: vpcmpeqd %ymm2, %ymm0, %k2 593; AVX512F-NEXT: .LBB7_3: 594; AVX512F-NEXT: korw %k0, %k1, %k0 595; AVX512F-NEXT: korw %k2, %k0, %k0 596; AVX512F-NEXT: kmovw %k0, %eax 597; AVX512F-NEXT: # kill: def $al killed $al killed $eax 598; AVX512F-NEXT: vzeroupper 599; AVX512F-NEXT: retq 600; 601; AVX512BW-LABEL: v8i32_or_select: 602; AVX512BW: # %bb.0: 603; AVX512BW-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 604; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 605; AVX512BW-NEXT: vpcmpgtd %ymm3, %ymm1, %k1 606; AVX512BW-NEXT: testb $1, %dil 607; AVX512BW-NEXT: jne .LBB7_1 608; AVX512BW-NEXT: # %bb.2: 609; AVX512BW-NEXT: kxorw %k0, %k0, %k2 610; AVX512BW-NEXT: jmp .LBB7_3 611; AVX512BW-NEXT: .LBB7_1: 612; AVX512BW-NEXT: vpcmpeqd %ymm2, %ymm0, %k2 613; AVX512BW-NEXT: .LBB7_3: 614; AVX512BW-NEXT: korw %k0, %k1, %k0 615; AVX512BW-NEXT: korw %k2, %k0, %k0 616; AVX512BW-NEXT: kmovd %k0, %eax 617; AVX512BW-NEXT: # kill: def $al killed $al killed $eax 618; AVX512BW-NEXT: vzeroupper 619; AVX512BW-NEXT: retq 620 %cmp0 = icmp eq <8 x i32> %a0, %a1 621 %cmp1 = icmp eq <8 x i32> %a0, %a2 622 %cmp2 = icmp slt <8 x i32> %a3, zeroinitializer 623 %sel = select i1 %a4, <8 x i1> %cmp1, <8 x i1> zeroinitializer 624 %or0 = or <8 x i1> %cmp2, %cmp0 625 %or1 = or <8 x i1> %or0, %sel 626 %res = bitcast <8 x i1> %or1 to i8 627 ret i8 %res 628} 629 630define i8 @v8f32_and(<8 x float> %a, <8 x float> %b, <8 x float> %c, <8 x float> %d) { 631; SSE2-SSSE3-LABEL: v8f32_and: 632; SSE2-SSSE3: # %bb.0: 633; SSE2-SSSE3-NEXT: cmpltps %xmm0, %xmm2 634; SSE2-SSSE3-NEXT: cmpltps %xmm1, %xmm3 635; SSE2-SSSE3-NEXT: cmpltps %xmm4, %xmm6 636; SSE2-SSSE3-NEXT: andps %xmm2, %xmm6 637; SSE2-SSSE3-NEXT: cmpltps %xmm5, %xmm7 638; SSE2-SSSE3-NEXT: andps %xmm3, %xmm7 639; SSE2-SSSE3-NEXT: packssdw %xmm7, %xmm6 640; SSE2-SSSE3-NEXT: packsswb %xmm6, %xmm6 641; SSE2-SSSE3-NEXT: pmovmskb %xmm6, %eax 642; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax 643; SSE2-SSSE3-NEXT: retq 644; 645; AVX12-LABEL: v8f32_and: 646; AVX12: # %bb.0: 647; AVX12-NEXT: vcmpltps %ymm2, %ymm3, %ymm2 648; AVX12-NEXT: vcmpltps %ymm0, %ymm1, %ymm0 649; AVX12-NEXT: vandps %ymm2, %ymm0, %ymm0 650; AVX12-NEXT: vmovmskps %ymm0, %eax 651; AVX12-NEXT: # kill: def $al killed $al killed $eax 652; AVX12-NEXT: vzeroupper 653; AVX12-NEXT: retq 654; 655; AVX512F-LABEL: v8f32_and: 656; AVX512F: # %bb.0: 657; AVX512F-NEXT: vcmpltps %ymm0, %ymm1, %k1 658; AVX512F-NEXT: vcmpltps %ymm2, %ymm3, %k0 {%k1} 659; AVX512F-NEXT: kmovw %k0, %eax 660; AVX512F-NEXT: # kill: def $al killed $al killed $eax 661; AVX512F-NEXT: vzeroupper 662; AVX512F-NEXT: retq 663; 664; AVX512BW-LABEL: v8f32_and: 665; AVX512BW: # %bb.0: 666; AVX512BW-NEXT: vcmpltps %ymm0, %ymm1, %k1 667; AVX512BW-NEXT: vcmpltps %ymm2, %ymm3, %k0 {%k1} 668; AVX512BW-NEXT: kmovd %k0, %eax 669; AVX512BW-NEXT: # kill: def $al killed $al killed $eax 670; AVX512BW-NEXT: vzeroupper 671; AVX512BW-NEXT: retq 672 %x0 = fcmp ogt <8 x float> %a, %b 673 %x1 = fcmp ogt <8 x float> %c, %d 674 %y = and <8 x i1> %x0, %x1 675 %res = bitcast <8 x i1> %y to i8 676 ret i8 %res 677} 678 679; We should see through any bitwise logic op. 680 681define i8 @v8f32_xor(<8 x float> %a, <8 x float> %b, <8 x float> %c, <8 x float> %d) { 682; SSE2-SSSE3-LABEL: v8f32_xor: 683; SSE2-SSSE3: # %bb.0: 684; SSE2-SSSE3-NEXT: cmpltps %xmm0, %xmm2 685; SSE2-SSSE3-NEXT: cmpltps %xmm1, %xmm3 686; SSE2-SSSE3-NEXT: cmpltps %xmm4, %xmm6 687; SSE2-SSSE3-NEXT: xorps %xmm2, %xmm6 688; SSE2-SSSE3-NEXT: cmpltps %xmm5, %xmm7 689; SSE2-SSSE3-NEXT: xorps %xmm3, %xmm7 690; SSE2-SSSE3-NEXT: packssdw %xmm7, %xmm6 691; SSE2-SSSE3-NEXT: packsswb %xmm6, %xmm6 692; SSE2-SSSE3-NEXT: pmovmskb %xmm6, %eax 693; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax 694; SSE2-SSSE3-NEXT: retq 695; 696; AVX12-LABEL: v8f32_xor: 697; AVX12: # %bb.0: 698; AVX12-NEXT: vcmpltps %ymm2, %ymm3, %ymm2 699; AVX12-NEXT: vcmpltps %ymm0, %ymm1, %ymm0 700; AVX12-NEXT: vxorps %ymm2, %ymm0, %ymm0 701; AVX12-NEXT: vmovmskps %ymm0, %eax 702; AVX12-NEXT: # kill: def $al killed $al killed $eax 703; AVX12-NEXT: vzeroupper 704; AVX12-NEXT: retq 705; 706; AVX512F-LABEL: v8f32_xor: 707; AVX512F: # %bb.0: 708; AVX512F-NEXT: vcmpltps %ymm0, %ymm1, %k0 709; AVX512F-NEXT: vcmpltps %ymm2, %ymm3, %k1 710; AVX512F-NEXT: kxorw %k1, %k0, %k0 711; AVX512F-NEXT: kmovw %k0, %eax 712; AVX512F-NEXT: # kill: def $al killed $al killed $eax 713; AVX512F-NEXT: vzeroupper 714; AVX512F-NEXT: retq 715; 716; AVX512BW-LABEL: v8f32_xor: 717; AVX512BW: # %bb.0: 718; AVX512BW-NEXT: vcmpltps %ymm0, %ymm1, %k0 719; AVX512BW-NEXT: vcmpltps %ymm2, %ymm3, %k1 720; AVX512BW-NEXT: kxorw %k1, %k0, %k0 721; AVX512BW-NEXT: kmovd %k0, %eax 722; AVX512BW-NEXT: # kill: def $al killed $al killed $eax 723; AVX512BW-NEXT: vzeroupper 724; AVX512BW-NEXT: retq 725 %x0 = fcmp ogt <8 x float> %a, %b 726 %x1 = fcmp ogt <8 x float> %c, %d 727 %y = xor <8 x i1> %x0, %x1 728 %res = bitcast <8 x i1> %y to i8 729 ret i8 %res 730} 731 732; We should see through multiple bitwise logic ops. 733 734define i8 @v8f32_xor_and(<8 x float> %a, <8 x float> %b, <8 x float> %c, <8 x float> %d, <8 x float> %e, <8 x float> %f) { 735; SSE2-SSSE3-LABEL: v8f32_xor_and: 736; SSE2-SSSE3: # %bb.0: 737; SSE2-SSSE3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm9 738; SSE2-SSSE3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm8 739; SSE2-SSSE3-NEXT: cmpnleps %xmm3, %xmm1 740; SSE2-SSSE3-NEXT: cmpnleps %xmm2, %xmm0 741; SSE2-SSSE3-NEXT: movaps %xmm5, %xmm2 742; SSE2-SSSE3-NEXT: cmpeqps %xmm7, %xmm2 743; SSE2-SSSE3-NEXT: cmpunordps %xmm7, %xmm5 744; SSE2-SSSE3-NEXT: orps %xmm2, %xmm5 745; SSE2-SSSE3-NEXT: xorps %xmm1, %xmm5 746; SSE2-SSSE3-NEXT: movaps %xmm4, %xmm1 747; SSE2-SSSE3-NEXT: cmpeqps %xmm6, %xmm1 748; SSE2-SSSE3-NEXT: cmpunordps %xmm6, %xmm4 749; SSE2-SSSE3-NEXT: orps %xmm1, %xmm4 750; SSE2-SSSE3-NEXT: xorps %xmm0, %xmm4 751; SSE2-SSSE3-NEXT: cmpltps {{[0-9]+}}(%rsp), %xmm8 752; SSE2-SSSE3-NEXT: andps %xmm4, %xmm8 753; SSE2-SSSE3-NEXT: cmpltps {{[0-9]+}}(%rsp), %xmm9 754; SSE2-SSSE3-NEXT: andps %xmm5, %xmm9 755; SSE2-SSSE3-NEXT: packssdw %xmm9, %xmm8 756; SSE2-SSSE3-NEXT: packsswb %xmm8, %xmm8 757; SSE2-SSSE3-NEXT: pmovmskb %xmm8, %eax 758; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax 759; SSE2-SSSE3-NEXT: retq 760; 761; AVX12-LABEL: v8f32_xor_and: 762; AVX12: # %bb.0: 763; AVX12-NEXT: vcmpeq_uqps %ymm3, %ymm2, %ymm2 764; AVX12-NEXT: vcmpnleps %ymm1, %ymm0, %ymm0 765; AVX12-NEXT: vxorps %ymm2, %ymm0, %ymm0 766; AVX12-NEXT: vcmpltps %ymm4, %ymm5, %ymm1 767; AVX12-NEXT: vandps %ymm1, %ymm0, %ymm0 768; AVX12-NEXT: vmovmskps %ymm0, %eax 769; AVX12-NEXT: # kill: def $al killed $al killed $eax 770; AVX12-NEXT: vzeroupper 771; AVX12-NEXT: retq 772; 773; AVX512F-LABEL: v8f32_xor_and: 774; AVX512F: # %bb.0: 775; AVX512F-NEXT: vcmpnleps %ymm1, %ymm0, %k0 776; AVX512F-NEXT: vcmpeq_uqps %ymm3, %ymm2, %k1 777; AVX512F-NEXT: kxorw %k1, %k0, %k1 778; AVX512F-NEXT: vcmpltps %ymm4, %ymm5, %k0 {%k1} 779; AVX512F-NEXT: kmovw %k0, %eax 780; AVX512F-NEXT: # kill: def $al killed $al killed $eax 781; AVX512F-NEXT: vzeroupper 782; AVX512F-NEXT: retq 783; 784; AVX512BW-LABEL: v8f32_xor_and: 785; AVX512BW: # %bb.0: 786; AVX512BW-NEXT: vcmpnleps %ymm1, %ymm0, %k0 787; AVX512BW-NEXT: vcmpeq_uqps %ymm3, %ymm2, %k1 788; AVX512BW-NEXT: kxorw %k1, %k0, %k1 789; AVX512BW-NEXT: vcmpltps %ymm4, %ymm5, %k0 {%k1} 790; AVX512BW-NEXT: kmovd %k0, %eax 791; AVX512BW-NEXT: # kill: def $al killed $al killed $eax 792; AVX512BW-NEXT: vzeroupper 793; AVX512BW-NEXT: retq 794 %x0 = fcmp ugt <8 x float> %a, %b 795 %x1 = fcmp ueq <8 x float> %c, %d 796 %x2 = fcmp ogt <8 x float> %e, %f 797 %y = xor <8 x i1> %x0, %x1 798 %z = and <8 x i1> %y, %x2 799 %res = bitcast <8 x i1> %z to i8 800 ret i8 %res 801} 802 803define i32 @v32i8(<32 x i8> %a, <32 x i8> %b, <32 x i8> %c, <32 x i8> %d) { 804; SSE2-SSSE3-LABEL: v32i8: 805; SSE2-SSSE3: # %bb.0: 806; SSE2-SSSE3-NEXT: pcmpgtb %xmm2, %xmm0 807; SSE2-SSSE3-NEXT: pcmpgtb %xmm3, %xmm1 808; SSE2-SSSE3-NEXT: pcmpgtb %xmm6, %xmm4 809; SSE2-SSSE3-NEXT: pand %xmm0, %xmm4 810; SSE2-SSSE3-NEXT: pcmpgtb %xmm7, %xmm5 811; SSE2-SSSE3-NEXT: pand %xmm1, %xmm5 812; SSE2-SSSE3-NEXT: pmovmskb %xmm4, %ecx 813; SSE2-SSSE3-NEXT: pmovmskb %xmm5, %eax 814; SSE2-SSSE3-NEXT: shll $16, %eax 815; SSE2-SSSE3-NEXT: orl %ecx, %eax 816; SSE2-SSSE3-NEXT: retq 817; 818; AVX1-LABEL: v32i8: 819; AVX1: # %bb.0: 820; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4 821; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5 822; AVX1-NEXT: vpcmpgtb %xmm4, %xmm5, %xmm4 823; AVX1-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0 824; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm1 825; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm5 826; AVX1-NEXT: vpcmpgtb %xmm1, %xmm5, %xmm1 827; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm1 828; AVX1-NEXT: vpcmpgtb %xmm3, %xmm2, %xmm2 829; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 830; AVX1-NEXT: vpmovmskb %xmm0, %ecx 831; AVX1-NEXT: vpmovmskb %xmm1, %eax 832; AVX1-NEXT: shll $16, %eax 833; AVX1-NEXT: orl %ecx, %eax 834; AVX1-NEXT: vzeroupper 835; AVX1-NEXT: retq 836; 837; AVX2-LABEL: v32i8: 838; AVX2: # %bb.0: 839; AVX2-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 840; AVX2-NEXT: vpcmpgtb %ymm3, %ymm2, %ymm1 841; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 842; AVX2-NEXT: vpmovmskb %ymm0, %eax 843; AVX2-NEXT: vzeroupper 844; AVX2-NEXT: retq 845; 846; AVX512F-LABEL: v32i8: 847; AVX512F: # %bb.0: 848; AVX512F-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 849; AVX512F-NEXT: vpcmpgtb %ymm3, %ymm2, %ymm1 850; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm0 851; AVX512F-NEXT: vpmovmskb %ymm0, %eax 852; AVX512F-NEXT: vzeroupper 853; AVX512F-NEXT: retq 854; 855; AVX512BW-LABEL: v32i8: 856; AVX512BW: # %bb.0: 857; AVX512BW-NEXT: vpcmpgtb %ymm1, %ymm0, %k1 858; AVX512BW-NEXT: vpcmpgtb %ymm3, %ymm2, %k0 {%k1} 859; AVX512BW-NEXT: kmovd %k0, %eax 860; AVX512BW-NEXT: vzeroupper 861; AVX512BW-NEXT: retq 862 %x0 = icmp sgt <32 x i8> %a, %b 863 %x1 = icmp sgt <32 x i8> %c, %d 864 %y = and <32 x i1> %x0, %x1 865 %res = bitcast <32 x i1> %y to i32 866 ret i32 %res 867} 868 869; PR61683 - ignore upper undef elements 870define i8 @v4i32_concat_undef(<4 x i32> %vec) { 871; SSE2-SSSE3-LABEL: v4i32_concat_undef: 872; SSE2-SSSE3: # %bb.0: 873; SSE2-SSSE3-NEXT: pxor %xmm1, %xmm1 874; SSE2-SSSE3-NEXT: pcmpeqd %xmm0, %xmm1 875; SSE2-SSSE3-NEXT: movmskps %xmm1, %eax 876; SSE2-SSSE3-NEXT: xorl $15, %eax 877; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax 878; SSE2-SSSE3-NEXT: retq 879; 880; AVX12-LABEL: v4i32_concat_undef: 881; AVX12: # %bb.0: 882; AVX12-NEXT: vpxor %xmm1, %xmm1, %xmm1 883; AVX12-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 884; AVX12-NEXT: vmovmskps %xmm0, %eax 885; AVX12-NEXT: xorl $15, %eax 886; AVX12-NEXT: # kill: def $al killed $al killed $eax 887; AVX12-NEXT: retq 888; 889; AVX512F-LABEL: v4i32_concat_undef: 890; AVX512F: # %bb.0: 891; AVX512F-NEXT: vptestmd %xmm0, %xmm0, %k0 892; AVX512F-NEXT: kmovw %k0, %eax 893; AVX512F-NEXT: # kill: def $al killed $al killed $eax 894; AVX512F-NEXT: retq 895; 896; AVX512BW-LABEL: v4i32_concat_undef: 897; AVX512BW: # %bb.0: 898; AVX512BW-NEXT: vptestmd %xmm0, %xmm0, %k0 899; AVX512BW-NEXT: kmovd %k0, %eax 900; AVX512BW-NEXT: # kill: def $al killed $al killed $eax 901; AVX512BW-NEXT: retq 902 %tobool = icmp ne <4 x i32> %vec, zeroinitializer 903 %insertvec = shufflevector <4 x i1> %tobool, <4 x i1> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef> 904 %res = bitcast <8 x i1> %insertvec to i8 905 ret i8 %res 906} 907 908define i8 @v2i64_concat_undef(<2 x i64> %vec) { 909; SSE2-SSSE3-LABEL: v2i64_concat_undef: 910; SSE2-SSSE3: # %bb.0: 911; SSE2-SSSE3-NEXT: pxor %xmm1, %xmm1 912; SSE2-SSSE3-NEXT: pcmpeqd %xmm0, %xmm1 913; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2] 914; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0 915; SSE2-SSSE3-NEXT: movmskpd %xmm0, %eax 916; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax 917; SSE2-SSSE3-NEXT: retq 918; 919; AVX12-LABEL: v2i64_concat_undef: 920; AVX12: # %bb.0: 921; AVX12-NEXT: vpxor %xmm1, %xmm1, %xmm1 922; AVX12-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 923; AVX12-NEXT: vmovmskpd %xmm0, %eax 924; AVX12-NEXT: # kill: def $al killed $al killed $eax 925; AVX12-NEXT: retq 926; 927; AVX512F-LABEL: v2i64_concat_undef: 928; AVX512F: # %bb.0: 929; AVX512F-NEXT: vptestnmq %xmm0, %xmm0, %k0 930; AVX512F-NEXT: kmovw %k0, %eax 931; AVX512F-NEXT: # kill: def $al killed $al killed $eax 932; AVX512F-NEXT: retq 933; 934; AVX512BW-LABEL: v2i64_concat_undef: 935; AVX512BW: # %bb.0: 936; AVX512BW-NEXT: vptestnmq %xmm0, %xmm0, %k0 937; AVX512BW-NEXT: kmovd %k0, %eax 938; AVX512BW-NEXT: # kill: def $al killed $al killed $eax 939; AVX512BW-NEXT: retq 940 %tobool = icmp eq <2 x i64> %vec, zeroinitializer 941 %insertvec = shufflevector <2 x i1> %tobool, <2 x i1> poison, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 942 %res = bitcast <8 x i1> %insertvec to i8 943 ret i8 %res 944} 945 946define i8 @v4f64_concat_undef(<4 x double> %vec) { 947; SSE2-SSSE3-LABEL: v4f64_concat_undef: 948; SSE2-SSSE3: # %bb.0: 949; SSE2-SSSE3-NEXT: xorpd %xmm2, %xmm2 950; SSE2-SSSE3-NEXT: xorpd %xmm3, %xmm3 951; SSE2-SSSE3-NEXT: cmpltpd %xmm1, %xmm3 952; SSE2-SSSE3-NEXT: cmpltpd %xmm0, %xmm2 953; SSE2-SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2] 954; SSE2-SSSE3-NEXT: movmskps %xmm2, %eax 955; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax 956; SSE2-SSSE3-NEXT: retq 957; 958; AVX12-LABEL: v4f64_concat_undef: 959; AVX12: # %bb.0: 960; AVX12-NEXT: vxorpd %xmm1, %xmm1, %xmm1 961; AVX12-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0 962; AVX12-NEXT: vmovmskpd %ymm0, %eax 963; AVX12-NEXT: # kill: def $al killed $al killed $eax 964; AVX12-NEXT: vzeroupper 965; AVX12-NEXT: retq 966; 967; AVX512F-LABEL: v4f64_concat_undef: 968; AVX512F: # %bb.0: 969; AVX512F-NEXT: vxorpd %xmm1, %xmm1, %xmm1 970; AVX512F-NEXT: vcmpltpd %ymm0, %ymm1, %k0 971; AVX512F-NEXT: kmovw %k0, %eax 972; AVX512F-NEXT: # kill: def $al killed $al killed $eax 973; AVX512F-NEXT: vzeroupper 974; AVX512F-NEXT: retq 975; 976; AVX512BW-LABEL: v4f64_concat_undef: 977; AVX512BW: # %bb.0: 978; AVX512BW-NEXT: vxorpd %xmm1, %xmm1, %xmm1 979; AVX512BW-NEXT: vcmpltpd %ymm0, %ymm1, %k0 980; AVX512BW-NEXT: kmovd %k0, %eax 981; AVX512BW-NEXT: # kill: def $al killed $al killed $eax 982; AVX512BW-NEXT: vzeroupper 983; AVX512BW-NEXT: retq 984 %tobool = fcmp ogt <4 x double> %vec, zeroinitializer 985 %insertvec = shufflevector <4 x i1> %tobool, <4 x i1> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef> 986 %res = bitcast <8 x i1> %insertvec to i8 987 ret i8 %res 988} 989