1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s -check-prefix=CHECK -check-prefix=AVX 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s -check-prefix=CHECK -check-prefix=AVX2 4 5; 6; testz(~X,Y) -> testc(X,Y) 7; 8 9define i32 @testpsz_128_invert0(<4 x float> %c, <4 x float> %d, i32 %a, i32 %b) { 10; CHECK-LABEL: testpsz_128_invert0: 11; CHECK: # %bb.0: 12; CHECK-NEXT: movl %edi, %eax 13; CHECK-NEXT: vtestps %xmm1, %xmm0 14; CHECK-NEXT: cmovael %esi, %eax 15; CHECK-NEXT: retq 16 %t0 = bitcast <4 x float> %c to <2 x i64> 17 %t1 = xor <2 x i64> %t0, <i64 -1, i64 -1> 18 %t2 = bitcast <2 x i64> %t1 to <4 x float> 19 %t3 = call i32 @llvm.x86.avx.vtestz.ps(<4 x float> %t2, <4 x float> %d) 20 %t4 = icmp ne i32 %t3, 0 21 %t5 = select i1 %t4, i32 %a, i32 %b 22 ret i32 %t5 23} 24 25define i32 @testpsz_256_invert0(<8 x float> %c, <8 x float> %d, i32 %a, i32 %b) { 26; CHECK-LABEL: testpsz_256_invert0: 27; CHECK: # %bb.0: 28; CHECK-NEXT: movl %edi, %eax 29; CHECK-NEXT: vtestps %ymm1, %ymm0 30; CHECK-NEXT: cmovael %esi, %eax 31; CHECK-NEXT: vzeroupper 32; CHECK-NEXT: retq 33 %t0 = bitcast <8 x float> %c to <4 x i64> 34 %t1 = xor <4 x i64> %t0, <i64 -1, i64 -1, i64 -1, i64 -1> 35 %t2 = bitcast <4 x i64> %t1 to <8 x float> 36 %t3 = call i32 @llvm.x86.avx.vtestz.ps.256(<8 x float> %t2, <8 x float> %d) 37 %t4 = icmp ne i32 %t3, 0 38 %t5 = select i1 %t4, i32 %a, i32 %b 39 ret i32 %t5 40} 41 42; 43; testz(X,~Y) -> testc(Y,X) 44; 45 46define i32 @testpsz_128_invert1(<4 x float> %c, <4 x float> %d, i32 %a, i32 %b) { 47; CHECK-LABEL: testpsz_128_invert1: 48; CHECK: # %bb.0: 49; CHECK-NEXT: movl %edi, %eax 50; CHECK-NEXT: vtestps %xmm0, %xmm1 51; CHECK-NEXT: cmovael %esi, %eax 52; CHECK-NEXT: retq 53 %t0 = bitcast <4 x float> %d to <2 x i64> 54 %t1 = xor <2 x i64> %t0, <i64 -1, i64 -1> 55 %t2 = bitcast <2 x i64> %t1 to <4 x float> 56 %t3 = call i32 @llvm.x86.avx.vtestz.ps(<4 x float> %c, <4 x float> %t2) 57 %t4 = icmp ne i32 %t3, 0 58 %t5 = select i1 %t4, i32 %a, i32 %b 59 ret i32 %t5 60} 61 62define i32 @testpsz_256_invert1(<8 x float> %c, <8 x float> %d, i32 %a, i32 %b) { 63; CHECK-LABEL: testpsz_256_invert1: 64; CHECK: # %bb.0: 65; CHECK-NEXT: movl %edi, %eax 66; CHECK-NEXT: vtestps %ymm0, %ymm1 67; CHECK-NEXT: cmovael %esi, %eax 68; CHECK-NEXT: vzeroupper 69; CHECK-NEXT: retq 70 %t0 = bitcast <8 x float> %d to <4 x i64> 71 %t1 = xor <4 x i64> %t0, <i64 -1, i64 -1, i64 -1, i64 -1> 72 %t2 = bitcast <4 x i64> %t1 to <8 x float> 73 %t3 = call i32 @llvm.x86.avx.vtestz.ps.256(<8 x float> %c, <8 x float> %t2) 74 %t4 = icmp ne i32 %t3, 0 75 %t5 = select i1 %t4, i32 %a, i32 %b 76 ret i32 %t5 77} 78 79; 80; testc(~X,Y) -> testz(X,Y) 81; 82 83define i32 @testpsc_128_invert0(<4 x float> %c, <4 x float> %d, i32 %a, i32 %b) { 84; CHECK-LABEL: testpsc_128_invert0: 85; CHECK: # %bb.0: 86; CHECK-NEXT: movl %edi, %eax 87; CHECK-NEXT: vtestps %xmm1, %xmm0 88; CHECK-NEXT: cmovnel %esi, %eax 89; CHECK-NEXT: retq 90 %t0 = bitcast <4 x float> %c to <2 x i64> 91 %t1 = xor <2 x i64> %t0, <i64 -1, i64 -1> 92 %t2 = bitcast <2 x i64> %t1 to <4 x float> 93 %t3 = call i32 @llvm.x86.avx.vtestc.ps(<4 x float> %t2, <4 x float> %d) 94 %t4 = icmp ne i32 %t3, 0 95 %t5 = select i1 %t4, i32 %a, i32 %b 96 ret i32 %t5 97} 98 99define i32 @testpsc_256_invert0(<8 x float> %c, <8 x float> %d, i32 %a, i32 %b) { 100; CHECK-LABEL: testpsc_256_invert0: 101; CHECK: # %bb.0: 102; CHECK-NEXT: movl %edi, %eax 103; CHECK-NEXT: vtestps %ymm1, %ymm0 104; CHECK-NEXT: cmovnel %esi, %eax 105; CHECK-NEXT: vzeroupper 106; CHECK-NEXT: retq 107 %t0 = bitcast <8 x float> %c to <4 x i64> 108 %t1 = xor <4 x i64> %t0, <i64 -1, i64 -1, i64 -1, i64 -1> 109 %t2 = bitcast <4 x i64> %t1 to <8 x float> 110 %t3 = call i32 @llvm.x86.avx.vtestc.ps.256(<8 x float> %t2, <8 x float> %d) 111 %t4 = icmp ne i32 %t3, 0 112 %t5 = select i1 %t4, i32 %a, i32 %b 113 ret i32 %t5 114} 115 116; 117; testnzc(~X,Y) -> testnzc(X,Y) 118; 119 120define i32 @testpsnzc_128_invert0(<4 x float> %c, <4 x float> %d, i32 %a, i32 %b) { 121; CHECK-LABEL: testpsnzc_128_invert0: 122; CHECK: # %bb.0: 123; CHECK-NEXT: movl %edi, %eax 124; CHECK-NEXT: vtestps %xmm1, %xmm0 125; CHECK-NEXT: cmovbel %esi, %eax 126; CHECK-NEXT: retq 127 %t0 = bitcast <4 x float> %c to <2 x i64> 128 %t1 = xor <2 x i64> %t0, <i64 -1, i64 -1> 129 %t2 = bitcast <2 x i64> %t1 to <4 x float> 130 %t3 = call i32 @llvm.x86.avx.vtestnzc.ps(<4 x float> %t2, <4 x float> %d) 131 %t4 = icmp ne i32 %t3, 0 132 %t5 = select i1 %t4, i32 %a, i32 %b 133 ret i32 %t5 134} 135 136define i32 @testpsnzc_256_invert0(<8 x float> %c, <8 x float> %d, i32 %a, i32 %b) { 137; CHECK-LABEL: testpsnzc_256_invert0: 138; CHECK: # %bb.0: 139; CHECK-NEXT: movl %edi, %eax 140; CHECK-NEXT: vtestps %ymm1, %ymm0 141; CHECK-NEXT: cmovbel %esi, %eax 142; CHECK-NEXT: vzeroupper 143; CHECK-NEXT: retq 144 %t0 = bitcast <8 x float> %c to <4 x i64> 145 %t1 = xor <4 x i64> %t0, <i64 -1, i64 -1, i64 -1, i64 -1> 146 %t2 = bitcast <4 x i64> %t1 to <8 x float> 147 %t3 = call i32 @llvm.x86.avx.vtestnzc.ps.256(<8 x float> %t2, <8 x float> %d) 148 %t4 = icmp ne i32 %t3, 0 149 %t5 = select i1 %t4, i32 %a, i32 %b 150 ret i32 %t5 151} 152 153; 154; SimplifyDemandedBits - only the sign bit is required 155; 156 157define i32 @testpsz_128_signbit(<4 x float> %c, <4 x float> %d, i32 %a, i32 %b) { 158; CHECK-LABEL: testpsz_128_signbit: 159; CHECK: # %bb.0: 160; CHECK-NEXT: movl %edi, %eax 161; CHECK-NEXT: vtestps %xmm1, %xmm0 162; CHECK-NEXT: cmovnel %esi, %eax 163; CHECK-NEXT: retq 164 %t0 = bitcast <4 x float> %c to <4 x i32> 165 %t1 = ashr <4 x i32> %t0, <i32 31, i32 31, i32 31, i32 31> 166 %t2 = bitcast <4 x i32> %t1 to <4 x float> 167 %t3 = call i32 @llvm.x86.avx.vtestz.ps(<4 x float> %t2, <4 x float> %d) 168 %t4 = icmp ne i32 %t3, 0 169 %t5 = select i1 %t4, i32 %a, i32 %b 170 ret i32 %t5 171} 172 173define i32 @testpsnzc_256_signbit(<8 x float> %c, <8 x float> %d, i32 %a, i32 %b) { 174; CHECK-LABEL: testpsnzc_256_signbit: 175; CHECK: # %bb.0: 176; CHECK-NEXT: movl %edi, %eax 177; CHECK-NEXT: vtestps %ymm1, %ymm0 178; CHECK-NEXT: cmovnel %esi, %eax 179; CHECK-NEXT: vzeroupper 180; CHECK-NEXT: retq 181 %t0 = bitcast <8 x float> %c to <8 x i32> 182 %t1 = icmp sgt <8 x i32> zeroinitializer, %t0 183 %t2 = sext <8 x i1> %t1 to <8 x i32> 184 %t3 = bitcast <8 x i32> %t2 to <8 x float> 185 %t4 = call i32 @llvm.x86.avx.vtestz.ps.256(<8 x float> %t3, <8 x float> %d) 186 %t5 = icmp ne i32 %t4, 0 187 %t6 = select i1 %t5, i32 %a, i32 %b 188 ret i32 %t6 189} 190 191define i32 @testpsc_256_signbit_multiuse(<8 x float> %c, i32 %a, i32 %b) { 192; CHECK-LABEL: testpsc_256_signbit_multiuse: 193; CHECK: # %bb.0: 194; CHECK-NEXT: movl %edi, %eax 195; CHECK-NEXT: vtestps %ymm0, %ymm0 196; CHECK-NEXT: cmovnel %esi, %eax 197; CHECK-NEXT: vzeroupper 198; CHECK-NEXT: retq 199 %t0 = bitcast <8 x float> %c to <8 x i32> 200 %t1 = ashr <8 x i32> %t0, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31> 201 %t2 = bitcast <8 x i32> %t1 to <8 x float> 202 %t3 = call i32 @llvm.x86.avx.vtestz.ps.256(<8 x float> %t2, <8 x float> %t2) 203 %t4 = icmp ne i32 %t3, 0 204 %t5 = select i1 %t4, i32 %a, i32 %b 205 ret i32 %t5 206} 207 208define i1 @PR62171(<8 x float> %a0, <8 x float> %a1) { 209; CHECK-LABEL: PR62171: 210; CHECK: # %bb.0: 211; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %ymm0 212; CHECK-NEXT: vtestps %ymm0, %ymm0 213; CHECK-NEXT: sete %al 214; CHECK-NEXT: vzeroupper 215; CHECK-NEXT: retq 216 %cmp = fcmp oeq <8 x float> %a0, %a1 217 %sext = sext <8 x i1> %cmp to <8 x i32> 218 %extract = shufflevector <8 x i32> %sext, <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 219 %extract1 = shufflevector <8 x i32> %sext, <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 220 %or = or <4 x i32> %extract, %extract1 221 %or1 = bitcast <4 x i32> %or to <16 x i8> 222 %msk = icmp slt <16 x i8> %or1, zeroinitializer 223 %msk1 = bitcast <16 x i1> %msk to i16 224 %not = icmp eq i16 %msk1, 0 225 ret i1 %not 226} 227 228define void @combine_testp_v8f32(<8 x i32> %x){ 229; AVX-LABEL: combine_testp_v8f32: 230; AVX: # %bb.0: # %entry 231; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 232; AVX-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1 233; AVX-NEXT: vtestps %ymm1, %ymm0 234; AVX-NEXT: vzeroupper 235; AVX-NEXT: retq 236; 237; AVX2-LABEL: combine_testp_v8f32: 238; AVX2: # %bb.0: # %entry 239; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 240; AVX2-NEXT: vtestps %ymm1, %ymm0 241; AVX2-NEXT: vzeroupper 242; AVX2-NEXT: retq 243entry: 244 %xor.i.i.i.i.i.i.i.i.i = xor <8 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1> 245 %.cast.i.i.i.i.i.i = bitcast <8 x i32> %xor.i.i.i.i.i.i.i.i.i to <8 x float> 246 %0 = call i32 @llvm.x86.avx.vtestz.ps.256(<8 x float> %.cast.i.i.i.i.i.i, <8 x float> %.cast.i.i.i.i.i.i) 247 %cmp.i.not.i.i.i.i.i.i = icmp eq i32 %0, 0 248 br i1 %cmp.i.not.i.i.i.i.i.i, label %if.end3.i.i.i.i.i.i, label %end 249 250if.end3.i.i.i.i.i.i: ; preds = %entry 251 ret void 252 253end: ; preds = %entry 254 ret void 255} 256 257define i32 @PR88958_1(ptr %0, <4 x float> %1) { 258; SSE-LABEL: PR88958_1: 259; SSE: # %bb.0: 260; SSE-NEXT: xorl %eax, %eax 261; SSE-NEXT: ptest (%rdi), %xmm0 262; SSE-NEXT: sete %al 263; SSE-NEXT: retq 264; 265; CHECK-LABEL: PR88958_1: 266; CHECK: # %bb.0: 267; CHECK-NEXT: xorl %eax, %eax 268; CHECK-NEXT: vtestps (%rdi), %xmm0 269; CHECK-NEXT: sete %al 270; CHECK-NEXT: retq 271 %3 = load <4 x float>, ptr %0 272 %4 = tail call i32 @llvm.x86.avx.vtestz.ps(<4 x float> %3, <4 x float> %1) 273 ret i32 %4 274} 275 276define i32 @PR88958_2(ptr %0, <4 x float> %1) { 277; SSE-LABEL: PR88958_2: 278; SSE: # %bb.0: 279; SSE-NEXT: movdqa (%rdi), %xmm1 280; SSE-NEXT: xorl %eax, %eax 281; SSE-NEXT: ptest %xmm0, %xmm1 282; SSE-NEXT: setb %al 283; SSE-NEXT: retq 284; 285; CHECK-LABEL: PR88958_2: 286; CHECK: # %bb.0: 287; CHECK-NEXT: vmovaps (%rdi), %xmm1 288; CHECK-NEXT: xorl %eax, %eax 289; CHECK-NEXT: vtestps %xmm0, %xmm1 290; CHECK-NEXT: setb %al 291; CHECK-NEXT: retq 292 %3 = load <4 x float>, ptr %0 293 %4 = tail call i32 @llvm.x86.avx.vtestc.ps(<4 x float> %3, <4 x float> %1) 294 ret i32 %4 295} 296 297define i32 @PR88958_3(ptr %0, <8 x float> %1) { 298; SSE-LABEL: PR88958_1: 299; SSE: # %bb.0: 300; SSE-NEXT: xorl %eax, %eax 301; SSE-NEXT: ptest (%rdi), %xmm0 302; SSE-NEXT: sete %al 303; SSE-NEXT: retq 304; 305; CHECK-LABEL: PR88958_3: 306; CHECK: # %bb.0: 307; CHECK-NEXT: xorl %eax, %eax 308; CHECK-NEXT: vtestps (%rdi), %ymm0 309; CHECK-NEXT: sete %al 310; CHECK-NEXT: vzeroupper 311; CHECK-NEXT: retq 312 %3 = load <8 x float>, ptr %0 313 %4 = tail call i32 @llvm.x86.avx.vtestz.ps.256(<8 x float> %3, <8 x float> %1) 314 ret i32 %4 315} 316 317define i32 @PR88958_4(ptr %0, <8 x float> %1) { 318; SSE-LABEL: PR88958_2: 319; SSE: # %bb.0: 320; SSE-NEXT: movdqa (%rdi), %xmm1 321; SSE-NEXT: xorl %eax, %eax 322; SSE-NEXT: ptest %xmm0, %xmm1 323; SSE-NEXT: setb %al 324; SSE-NEXT: retq 325; 326; CHECK-LABEL: PR88958_4: 327; CHECK: # %bb.0: 328; CHECK-NEXT: vmovaps (%rdi), %ymm1 329; CHECK-NEXT: xorl %eax, %eax 330; CHECK-NEXT: vtestps %ymm0, %ymm1 331; CHECK-NEXT: setb %al 332; CHECK-NEXT: vzeroupper 333; CHECK-NEXT: retq 334 %3 = load <8 x float>, ptr %0 335 %4 = tail call i32 @llvm.x86.avx.vtestc.ps.256(<8 x float> %3, <8 x float> %1) 336 ret i32 %4 337} 338 339declare i32 @llvm.x86.avx.vtestz.ps(<4 x float>, <4 x float>) nounwind readnone 340declare i32 @llvm.x86.avx.vtestc.ps(<4 x float>, <4 x float>) nounwind readnone 341declare i32 @llvm.x86.avx.vtestnzc.ps(<4 x float>, <4 x float>) nounwind readnone 342 343declare i32 @llvm.x86.avx.vtestz.ps.256(<8 x float>, <8 x float>) nounwind readnone 344declare i32 @llvm.x86.avx.vtestc.ps.256(<8 x float>, <8 x float>) nounwind readnone 345declare i32 @llvm.x86.avx.vtestnzc.ps.256(<8 x float>, <8 x float>) nounwind readnone 346