1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE,SSE42 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+prefer-movmsk-over-vtest | FileCheck %s --check-prefixes=ADL 7 8declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>) 9declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>) 10declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>) 11 12; Use widest possible vector for movmsk comparisons (PR37087) 13 14define i1 @movmskps_noneof_bitcast_v2f64(<2 x double> %a0) { 15; SSE-LABEL: movmskps_noneof_bitcast_v2f64: 16; SSE: # %bb.0: 17; SSE-NEXT: xorpd %xmm1, %xmm1 18; SSE-NEXT: cmpeqpd %xmm0, %xmm1 19; SSE-NEXT: movmskpd %xmm1, %eax 20; SSE-NEXT: testl %eax, %eax 21; SSE-NEXT: sete %al 22; SSE-NEXT: retq 23; 24; AVX-LABEL: movmskps_noneof_bitcast_v2f64: 25; AVX: # %bb.0: 26; AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 27; AVX-NEXT: vcmpeqpd %xmm0, %xmm1, %xmm0 28; AVX-NEXT: vtestpd %xmm0, %xmm0 29; AVX-NEXT: sete %al 30; AVX-NEXT: retq 31; 32; ADL-LABEL: movmskps_noneof_bitcast_v2f64: 33; ADL: # %bb.0: 34; ADL-NEXT: vxorpd %xmm1, %xmm1, %xmm1 35; ADL-NEXT: vcmpeqpd %xmm0, %xmm1, %xmm0 36; ADL-NEXT: vmovmskpd %xmm0, %eax 37; ADL-NEXT: testl %eax, %eax 38; ADL-NEXT: sete %al 39; ADL-NEXT: retq 40 %1 = fcmp oeq <2 x double> zeroinitializer, %a0 41 %2 = sext <2 x i1> %1 to <2 x i64> 42 %3 = bitcast <2 x i64> %2 to <4 x float> 43 %4 = tail call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %3) 44 %5 = icmp eq i32 %4, 0 45 ret i1 %5 46} 47 48define i1 @movmskps_allof_bitcast_v2f64(<2 x double> %a0) { 49; SSE-LABEL: movmskps_allof_bitcast_v2f64: 50; SSE: # %bb.0: 51; SSE-NEXT: xorpd %xmm1, %xmm1 52; SSE-NEXT: cmpeqpd %xmm0, %xmm1 53; SSE-NEXT: movmskpd %xmm1, %eax 54; SSE-NEXT: cmpl $3, %eax 55; SSE-NEXT: sete %al 56; SSE-NEXT: retq 57; 58; AVX-LABEL: movmskps_allof_bitcast_v2f64: 59; AVX: # %bb.0: 60; AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 61; AVX-NEXT: vcmpeqpd %xmm0, %xmm1, %xmm0 62; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 63; AVX-NEXT: vtestpd %xmm1, %xmm0 64; AVX-NEXT: setb %al 65; AVX-NEXT: retq 66; 67; ADL-LABEL: movmskps_allof_bitcast_v2f64: 68; ADL: # %bb.0: 69; ADL-NEXT: vxorpd %xmm1, %xmm1, %xmm1 70; ADL-NEXT: vcmpeqpd %xmm0, %xmm1, %xmm0 71; ADL-NEXT: vmovmskpd %xmm0, %eax 72; ADL-NEXT: cmpl $3, %eax 73; ADL-NEXT: sete %al 74; ADL-NEXT: retq 75 %1 = fcmp oeq <2 x double> zeroinitializer, %a0 76 %2 = sext <2 x i1> %1 to <2 x i64> 77 %3 = bitcast <2 x i64> %2 to <4 x float> 78 %4 = tail call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %3) 79 %5 = icmp eq i32 %4, 15 80 ret i1 %5 81} 82 83define i1 @pmovmskb_noneof_bitcast_v2i64(<2 x i64> %a0) { 84; SSE2-LABEL: pmovmskb_noneof_bitcast_v2i64: 85; SSE2: # %bb.0: 86; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 87; SSE2-NEXT: movmskps %xmm0, %eax 88; SSE2-NEXT: testl %eax, %eax 89; SSE2-NEXT: sete %al 90; SSE2-NEXT: retq 91; 92; SSE42-LABEL: pmovmskb_noneof_bitcast_v2i64: 93; SSE42: # %bb.0: 94; SSE42-NEXT: movmskpd %xmm0, %eax 95; SSE42-NEXT: testl %eax, %eax 96; SSE42-NEXT: sete %al 97; SSE42-NEXT: retq 98; 99; AVX-LABEL: pmovmskb_noneof_bitcast_v2i64: 100; AVX: # %bb.0: 101; AVX-NEXT: vtestpd %xmm0, %xmm0 102; AVX-NEXT: sete %al 103; AVX-NEXT: retq 104; 105; ADL-LABEL: pmovmskb_noneof_bitcast_v2i64: 106; ADL: # %bb.0: 107; ADL-NEXT: vmovmskpd %xmm0, %eax 108; ADL-NEXT: testl %eax, %eax 109; ADL-NEXT: sete %al 110; ADL-NEXT: retq 111 %1 = icmp sgt <2 x i64> zeroinitializer, %a0 112 %2 = sext <2 x i1> %1 to <2 x i64> 113 %3 = bitcast <2 x i64> %2 to <16 x i8> 114 %4 = tail call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %3) 115 %5 = icmp eq i32 %4, 0 116 ret i1 %5 117} 118 119define i1 @pmovmskb_allof_bitcast_v2i64(<2 x i64> %a0) { 120; SSE2-LABEL: pmovmskb_allof_bitcast_v2i64: 121; SSE2: # %bb.0: 122; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 123; SSE2-NEXT: movmskps %xmm0, %eax 124; SSE2-NEXT: cmpl $15, %eax 125; SSE2-NEXT: sete %al 126; SSE2-NEXT: retq 127; 128; SSE42-LABEL: pmovmskb_allof_bitcast_v2i64: 129; SSE42: # %bb.0: 130; SSE42-NEXT: movmskpd %xmm0, %eax 131; SSE42-NEXT: cmpl $3, %eax 132; SSE42-NEXT: sete %al 133; SSE42-NEXT: retq 134; 135; AVX-LABEL: pmovmskb_allof_bitcast_v2i64: 136; AVX: # %bb.0: 137; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 138; AVX-NEXT: vtestpd %xmm1, %xmm0 139; AVX-NEXT: setb %al 140; AVX-NEXT: retq 141; 142; ADL-LABEL: pmovmskb_allof_bitcast_v2i64: 143; ADL: # %bb.0: 144; ADL-NEXT: vmovmskpd %xmm0, %eax 145; ADL-NEXT: cmpl $3, %eax 146; ADL-NEXT: sete %al 147; ADL-NEXT: retq 148 %1 = icmp sgt <2 x i64> zeroinitializer, %a0 149 %2 = sext <2 x i1> %1 to <2 x i64> 150 %3 = bitcast <2 x i64> %2 to <16 x i8> 151 %4 = tail call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %3) 152 %5 = icmp eq i32 %4, 65535 153 ret i1 %5 154} 155 156define i1 @pmovmskb_noneof_bitcast_v4f32(<4 x float> %a0) { 157; SSE-LABEL: pmovmskb_noneof_bitcast_v4f32: 158; SSE: # %bb.0: 159; SSE-NEXT: xorps %xmm1, %xmm1 160; SSE-NEXT: cmpeqps %xmm0, %xmm1 161; SSE-NEXT: movmskps %xmm1, %eax 162; SSE-NEXT: testl %eax, %eax 163; SSE-NEXT: sete %al 164; SSE-NEXT: retq 165; 166; AVX-LABEL: pmovmskb_noneof_bitcast_v4f32: 167; AVX: # %bb.0: 168; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 169; AVX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0 170; AVX-NEXT: vtestps %xmm0, %xmm0 171; AVX-NEXT: sete %al 172; AVX-NEXT: retq 173; 174; ADL-LABEL: pmovmskb_noneof_bitcast_v4f32: 175; ADL: # %bb.0: 176; ADL-NEXT: vxorps %xmm1, %xmm1, %xmm1 177; ADL-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0 178; ADL-NEXT: vmovmskps %xmm0, %eax 179; ADL-NEXT: testl %eax, %eax 180; ADL-NEXT: sete %al 181; ADL-NEXT: retq 182 %1 = fcmp oeq <4 x float> %a0, zeroinitializer 183 %2 = sext <4 x i1> %1 to <4 x i32> 184 %3 = bitcast <4 x i32> %2 to <16 x i8> 185 %4 = tail call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %3) 186 %5 = icmp eq i32 %4, 0 187 ret i1 %5 188} 189 190define i1 @pmovmskb_allof_bitcast_v4f32(<4 x float> %a0) { 191; SSE-LABEL: pmovmskb_allof_bitcast_v4f32: 192; SSE: # %bb.0: 193; SSE-NEXT: xorps %xmm1, %xmm1 194; SSE-NEXT: cmpeqps %xmm0, %xmm1 195; SSE-NEXT: movmskps %xmm1, %eax 196; SSE-NEXT: cmpl $15, %eax 197; SSE-NEXT: sete %al 198; SSE-NEXT: retq 199; 200; AVX-LABEL: pmovmskb_allof_bitcast_v4f32: 201; AVX: # %bb.0: 202; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 203; AVX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0 204; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 205; AVX-NEXT: vtestps %xmm1, %xmm0 206; AVX-NEXT: setb %al 207; AVX-NEXT: retq 208; 209; ADL-LABEL: pmovmskb_allof_bitcast_v4f32: 210; ADL: # %bb.0: 211; ADL-NEXT: vxorps %xmm1, %xmm1, %xmm1 212; ADL-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0 213; ADL-NEXT: vmovmskps %xmm0, %eax 214; ADL-NEXT: cmpl $15, %eax 215; ADL-NEXT: sete %al 216; ADL-NEXT: retq 217 %1 = fcmp oeq <4 x float> %a0, zeroinitializer 218 %2 = sext <4 x i1> %1 to <4 x i32> 219 %3 = bitcast <4 x i32> %2 to <16 x i8> 220 %4 = tail call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %3) 221 %5 = icmp eq i32 %4, 65535 222 ret i1 %5 223} 224 225; MOVMSK(ICMP_SGT(X,-1)) -> NOT(MOVMSK(X))) 226define i1 @movmskps_allof_v4i32_positive(<4 x i32> %a0) { 227; SSE-LABEL: movmskps_allof_v4i32_positive: 228; SSE: # %bb.0: 229; SSE-NEXT: movmskps %xmm0, %eax 230; SSE-NEXT: xorl $15, %eax 231; SSE-NEXT: cmpl $15, %eax 232; SSE-NEXT: sete %al 233; SSE-NEXT: retq 234; 235; AVX-LABEL: movmskps_allof_v4i32_positive: 236; AVX: # %bb.0: 237; AVX-NEXT: vmovmskps %xmm0, %eax 238; AVX-NEXT: xorl $15, %eax 239; AVX-NEXT: cmpl $15, %eax 240; AVX-NEXT: sete %al 241; AVX-NEXT: retq 242; 243; ADL-LABEL: movmskps_allof_v4i32_positive: 244; ADL: # %bb.0: 245; ADL-NEXT: vmovmskps %xmm0, %eax 246; ADL-NEXT: xorl $15, %eax 247; ADL-NEXT: cmpl $15, %eax 248; ADL-NEXT: sete %al 249; ADL-NEXT: retq 250 %1 = icmp sgt <4 x i32> %a0, <i32 -1, i32 -1, i32 -1, i32 -1> 251 %2 = sext <4 x i1> %1 to <4 x i32> 252 %3 = bitcast <4 x i32> %2 to <4 x float> 253 %4 = tail call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %3) 254 %5 = icmp eq i32 %4, 15 255 ret i1 %5 256} 257 258define i1 @pmovmskb_noneof_v16i8_positive(<16 x i8> %a0) { 259; SSE-LABEL: pmovmskb_noneof_v16i8_positive: 260; SSE: # %bb.0: 261; SSE-NEXT: pmovmskb %xmm0, %eax 262; SSE-NEXT: xorl $65535, %eax # imm = 0xFFFF 263; SSE-NEXT: sete %al 264; SSE-NEXT: retq 265; 266; AVX-LABEL: pmovmskb_noneof_v16i8_positive: 267; AVX: # %bb.0: 268; AVX-NEXT: vpmovmskb %xmm0, %eax 269; AVX-NEXT: xorl $65535, %eax # imm = 0xFFFF 270; AVX-NEXT: sete %al 271; AVX-NEXT: retq 272; 273; ADL-LABEL: pmovmskb_noneof_v16i8_positive: 274; ADL: # %bb.0: 275; ADL-NEXT: vpmovmskb %xmm0, %eax 276; ADL-NEXT: xorl $65535, %eax # imm = 0xFFFF 277; ADL-NEXT: sete %al 278; ADL-NEXT: retq 279 %1 = icmp sgt <16 x i8> %a0, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 280 %2 = sext <16 x i1> %1 to <16 x i8> 281 %3 = tail call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %2) 282 %4 = icmp eq i32 %3, 0 283 ret i1 %4 284} 285 286; MOVMSK(CMPEQ(AND(X,C1),0)) -> MOVMSK(NOT(SHL(X,C2))) 287define i32 @movmskpd_pow2_mask(<2 x i64> %a0) { 288; SSE2-LABEL: movmskpd_pow2_mask: 289; SSE2: # %bb.0: 290; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 291; SSE2-NEXT: pxor %xmm1, %xmm1 292; SSE2-NEXT: pcmpeqd %xmm0, %xmm1 293; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2] 294; SSE2-NEXT: pand %xmm1, %xmm0 295; SSE2-NEXT: movmskpd %xmm0, %eax 296; SSE2-NEXT: retq 297; 298; SSE42-LABEL: movmskpd_pow2_mask: 299; SSE42: # %bb.0: 300; SSE42-NEXT: movmskpd %xmm0, %eax 301; SSE42-NEXT: xorl $3, %eax 302; SSE42-NEXT: retq 303; 304; AVX-LABEL: movmskpd_pow2_mask: 305; AVX: # %bb.0: 306; AVX-NEXT: vmovmskpd %xmm0, %eax 307; AVX-NEXT: xorl $3, %eax 308; AVX-NEXT: retq 309; 310; ADL-LABEL: movmskpd_pow2_mask: 311; ADL: # %bb.0: 312; ADL-NEXT: vmovmskpd %xmm0, %eax 313; ADL-NEXT: xorl $3, %eax 314; ADL-NEXT: retq 315 %1 = and <2 x i64> %a0, <i64 -9223372036854775808, i64 -9223372036854775808> 316 %2 = icmp eq <2 x i64> %1, zeroinitializer 317 %3 = sext <2 x i1> %2 to <2 x i64> 318 %4 = bitcast <2 x i64> %3 to <2 x double> 319 %5 = tail call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %4) 320 ret i32 %5 321} 322 323define i32 @movmskps_pow2_mask(<4 x i32> %a0) { 324; SSE-LABEL: movmskps_pow2_mask: 325; SSE: # %bb.0: 326; SSE-NEXT: pslld $29, %xmm0 327; SSE-NEXT: movmskps %xmm0, %eax 328; SSE-NEXT: xorl $15, %eax 329; SSE-NEXT: retq 330; 331; AVX-LABEL: movmskps_pow2_mask: 332; AVX: # %bb.0: 333; AVX-NEXT: vpslld $29, %xmm0, %xmm0 334; AVX-NEXT: vmovmskps %xmm0, %eax 335; AVX-NEXT: xorl $15, %eax 336; AVX-NEXT: retq 337; 338; ADL-LABEL: movmskps_pow2_mask: 339; ADL: # %bb.0: 340; ADL-NEXT: vpslld $29, %xmm0, %xmm0 341; ADL-NEXT: vmovmskps %xmm0, %eax 342; ADL-NEXT: xorl $15, %eax 343; ADL-NEXT: retq 344 %1 = and <4 x i32> %a0, <i32 4, i32 4, i32 4, i32 4> 345 %2 = icmp eq <4 x i32> %1, zeroinitializer 346 %3 = sext <4 x i1> %2 to <4 x i32> 347 %4 = bitcast <4 x i32> %3 to <4 x float> 348 %5 = tail call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %4) 349 ret i32 %5 350} 351 352define i32 @pmovmskb_pow2_mask(<16 x i8> %a0) { 353; SSE-LABEL: pmovmskb_pow2_mask: 354; SSE: # %bb.0: 355; SSE-NEXT: psllw $7, %xmm0 356; SSE-NEXT: pmovmskb %xmm0, %eax 357; SSE-NEXT: xorl $65535, %eax # imm = 0xFFFF 358; SSE-NEXT: retq 359; 360; AVX-LABEL: pmovmskb_pow2_mask: 361; AVX: # %bb.0: 362; AVX-NEXT: vpsllw $7, %xmm0, %xmm0 363; AVX-NEXT: vpmovmskb %xmm0, %eax 364; AVX-NEXT: xorl $65535, %eax # imm = 0xFFFF 365; AVX-NEXT: retq 366; 367; ADL-LABEL: pmovmskb_pow2_mask: 368; ADL: # %bb.0: 369; ADL-NEXT: vpsllw $7, %xmm0, %xmm0 370; ADL-NEXT: vpmovmskb %xmm0, %eax 371; ADL-NEXT: xorl $65535, %eax # imm = 0xFFFF 372; ADL-NEXT: retq 373 %1 = and <16 x i8> %a0, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 374 %2 = icmp eq <16 x i8> %1, zeroinitializer 375 %3 = sext <16 x i1> %2 to <16 x i8> 376 %4 = tail call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %3) 377 ret i32 %4 378} 379 380; AND(MOVMSK(X),MOVMSK(Y)) -> MOVMSK(AND(X,Y)) 381; XOR(MOVMSK(X),MOVMSK(Y)) -> MOVMSK(XOR(X,Y)) 382; OR(MOVMSK(X),MOVMSK(Y)) -> MOVMSK(OR(X,Y)) 383; if the elements are the same width. 384 385define i32 @and_movmskpd_movmskpd(<2 x double> %a0, <2 x i64> %a1) { 386; SSE-LABEL: and_movmskpd_movmskpd: 387; SSE: # %bb.0: 388; SSE-NEXT: xorpd %xmm2, %xmm2 389; SSE-NEXT: cmpeqpd %xmm0, %xmm2 390; SSE-NEXT: andpd %xmm1, %xmm2 391; SSE-NEXT: movmskpd %xmm2, %eax 392; SSE-NEXT: retq 393; 394; AVX-LABEL: and_movmskpd_movmskpd: 395; AVX: # %bb.0: 396; AVX-NEXT: vxorpd %xmm2, %xmm2, %xmm2 397; AVX-NEXT: vcmpeqpd %xmm0, %xmm2, %xmm0 398; AVX-NEXT: vandpd %xmm1, %xmm0, %xmm0 399; AVX-NEXT: vmovmskpd %xmm0, %eax 400; AVX-NEXT: retq 401; 402; ADL-LABEL: and_movmskpd_movmskpd: 403; ADL: # %bb.0: 404; ADL-NEXT: vxorpd %xmm2, %xmm2, %xmm2 405; ADL-NEXT: vcmpeqpd %xmm0, %xmm2, %xmm0 406; ADL-NEXT: vandpd %xmm1, %xmm0, %xmm0 407; ADL-NEXT: vmovmskpd %xmm0, %eax 408; ADL-NEXT: retq 409 %1 = fcmp oeq <2 x double> zeroinitializer, %a0 410 %2 = sext <2 x i1> %1 to <2 x i64> 411 %3 = bitcast <2 x i64> %2 to <2 x double> 412 %4 = tail call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %3) 413 %5 = icmp sgt <2 x i64> zeroinitializer, %a1 414 %6 = bitcast <2 x i1> %5 to i2 415 %7 = zext i2 %6 to i32 416 %8 = and i32 %4, %7 417 ret i32 %8 418} 419 420define i32 @xor_movmskps_movmskps(<4 x float> %a0, <4 x i32> %a1) { 421; SSE-LABEL: xor_movmskps_movmskps: 422; SSE: # %bb.0: 423; SSE-NEXT: xorps %xmm2, %xmm2 424; SSE-NEXT: cmpeqps %xmm0, %xmm2 425; SSE-NEXT: xorps %xmm1, %xmm2 426; SSE-NEXT: movmskps %xmm2, %eax 427; SSE-NEXT: retq 428; 429; AVX-LABEL: xor_movmskps_movmskps: 430; AVX: # %bb.0: 431; AVX-NEXT: vxorps %xmm2, %xmm2, %xmm2 432; AVX-NEXT: vcmpeqps %xmm0, %xmm2, %xmm0 433; AVX-NEXT: vxorps %xmm1, %xmm0, %xmm0 434; AVX-NEXT: vmovmskps %xmm0, %eax 435; AVX-NEXT: retq 436; 437; ADL-LABEL: xor_movmskps_movmskps: 438; ADL: # %bb.0: 439; ADL-NEXT: vxorps %xmm2, %xmm2, %xmm2 440; ADL-NEXT: vcmpeqps %xmm0, %xmm2, %xmm0 441; ADL-NEXT: vxorps %xmm1, %xmm0, %xmm0 442; ADL-NEXT: vmovmskps %xmm0, %eax 443; ADL-NEXT: retq 444 %1 = fcmp oeq <4 x float> zeroinitializer, %a0 445 %2 = sext <4 x i1> %1 to <4 x i32> 446 %3 = bitcast <4 x i32> %2 to <4 x float> 447 %4 = tail call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %3) 448 %5 = ashr <4 x i32> %a1, <i32 31, i32 31, i32 31, i32 31> 449 %6 = bitcast <4 x i32> %5 to <4 x float> 450 %7 = tail call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %6) 451 %8 = xor i32 %4, %7 452 ret i32 %8 453} 454 455define i32 @or_pmovmskb_pmovmskb(<16 x i8> %a0, <8 x i16> %a1) { 456; SSE-LABEL: or_pmovmskb_pmovmskb: 457; SSE: # %bb.0: 458; SSE-NEXT: pxor %xmm2, %xmm2 459; SSE-NEXT: pcmpeqb %xmm0, %xmm2 460; SSE-NEXT: psraw $15, %xmm1 461; SSE-NEXT: por %xmm2, %xmm1 462; SSE-NEXT: pmovmskb %xmm1, %eax 463; SSE-NEXT: retq 464; 465; AVX-LABEL: or_pmovmskb_pmovmskb: 466; AVX: # %bb.0: 467; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2 468; AVX-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0 469; AVX-NEXT: vpsraw $15, %xmm1, %xmm1 470; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 471; AVX-NEXT: vpmovmskb %xmm0, %eax 472; AVX-NEXT: retq 473; 474; ADL-LABEL: or_pmovmskb_pmovmskb: 475; ADL: # %bb.0: 476; ADL-NEXT: vpxor %xmm2, %xmm2, %xmm2 477; ADL-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0 478; ADL-NEXT: vpsraw $15, %xmm1, %xmm1 479; ADL-NEXT: vpor %xmm1, %xmm0, %xmm0 480; ADL-NEXT: vpmovmskb %xmm0, %eax 481; ADL-NEXT: retq 482 %1 = icmp eq <16 x i8> zeroinitializer, %a0 483 %2 = sext <16 x i1> %1 to <16 x i8> 484 %3 = tail call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %2) 485 %4 = ashr <8 x i16> %a1, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> 486 %5 = bitcast <8 x i16> %4 to <16 x i8> 487 %6 = tail call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %5) 488 %7 = or i32 %3, %6 489 ret i32 %7 490} 491 492; We can't fold to ptest if we're not checking every pcmpeq result 493define i32 @movmskps_ptest_numelts_mismatch(<16 x i8> %a0) { 494; SSE-LABEL: movmskps_ptest_numelts_mismatch: 495; SSE: # %bb.0: 496; SSE-NEXT: pxor %xmm1, %xmm1 497; SSE-NEXT: pcmpeqb %xmm0, %xmm1 498; SSE-NEXT: movmskps %xmm1, %ecx 499; SSE-NEXT: xorl %eax, %eax 500; SSE-NEXT: cmpl $15, %ecx 501; SSE-NEXT: sete %al 502; SSE-NEXT: negl %eax 503; SSE-NEXT: retq 504; 505; AVX-LABEL: movmskps_ptest_numelts_mismatch: 506; AVX: # %bb.0: 507; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 508; AVX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 509; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 510; AVX-NEXT: xorl %eax, %eax 511; AVX-NEXT: vtestps %xmm1, %xmm0 512; AVX-NEXT: sbbl %eax, %eax 513; AVX-NEXT: retq 514; 515; ADL-LABEL: movmskps_ptest_numelts_mismatch: 516; ADL: # %bb.0: 517; ADL-NEXT: vpxor %xmm1, %xmm1, %xmm1 518; ADL-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 519; ADL-NEXT: vmovmskps %xmm0, %ecx 520; ADL-NEXT: xorl %eax, %eax 521; ADL-NEXT: cmpl $15, %ecx 522; ADL-NEXT: sete %al 523; ADL-NEXT: negl %eax 524; ADL-NEXT: retq 525 %1 = icmp eq <16 x i8> %a0, zeroinitializer 526 %2 = sext <16 x i1> %1 to <16 x i8> 527 %3 = bitcast <16 x i8> %2 to <4 x float> 528 %4 = tail call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %3) 529 %5 = icmp eq i32 %4, 15 530 %6 = sext i1 %5 to i32 531 ret i32 %6 532} 533