1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s -check-prefix=CHECK -check-prefix=AVX 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s -check-prefix=CHECK -check-prefix=AVX2 4 5; 6; testz(~X,Y) -> testc(X,Y) 7; 8 9define i32 @testpdz_128_invert0(<2 x double> %c, <2 x double> %d, i32 %a, i32 %b) { 10; CHECK-LABEL: testpdz_128_invert0: 11; CHECK: # %bb.0: 12; CHECK-NEXT: movl %edi, %eax 13; CHECK-NEXT: vtestpd %xmm1, %xmm0 14; CHECK-NEXT: cmovael %esi, %eax 15; CHECK-NEXT: retq 16 %t0 = bitcast <2 x double> %c to <2 x i64> 17 %t1 = xor <2 x i64> %t0, <i64 -1, i64 -1> 18 %t2 = bitcast <2 x i64> %t1 to <2 x double> 19 %t3 = call i32 @llvm.x86.avx.vtestz.pd(<2 x double> %t2, <2 x double> %d) 20 %t4 = icmp ne i32 %t3, 0 21 %t5 = select i1 %t4, i32 %a, i32 %b 22 ret i32 %t5 23} 24 25define i32 @testpdz_256_invert0(<4 x double> %c, <4 x double> %d, i32 %a, i32 %b) { 26; CHECK-LABEL: testpdz_256_invert0: 27; CHECK: # %bb.0: 28; CHECK-NEXT: movl %edi, %eax 29; CHECK-NEXT: vtestpd %ymm1, %ymm0 30; CHECK-NEXT: cmovael %esi, %eax 31; CHECK-NEXT: vzeroupper 32; CHECK-NEXT: retq 33 %t0 = bitcast <4 x double> %c to <4 x i64> 34 %t1 = xor <4 x i64> %t0, <i64 -1, i64 -1, i64 -1, i64 -1> 35 %t2 = bitcast <4 x i64> %t1 to <4 x double> 36 %t3 = call i32 @llvm.x86.avx.vtestz.pd.256(<4 x double> %t2, <4 x double> %d) 37 %t4 = icmp ne i32 %t3, 0 38 %t5 = select i1 %t4, i32 %a, i32 %b 39 ret i32 %t5 40} 41 42; 43; testz(X,~Y) -> testc(Y,X) 44; 45 46define i32 @testpdz_128_invert1(<2 x double> %c, <2 x double> %d, i32 %a, i32 %b) { 47; CHECK-LABEL: testpdz_128_invert1: 48; CHECK: # %bb.0: 49; CHECK-NEXT: movl %edi, %eax 50; CHECK-NEXT: vtestpd %xmm0, %xmm1 51; CHECK-NEXT: cmovael %esi, %eax 52; CHECK-NEXT: retq 53 %t0 = bitcast <2 x double> %d to <2 x i64> 54 %t1 = xor <2 x i64> %t0, <i64 -1, i64 -1> 55 %t2 = bitcast <2 x i64> %t1 to <2 x double> 56 %t3 = call i32 @llvm.x86.avx.vtestz.pd(<2 x double> %c, <2 x double> %t2) 57 %t4 = icmp ne i32 %t3, 0 58 %t5 = select i1 %t4, i32 %a, i32 %b 59 ret i32 %t5 60} 61 62define i32 @testpdz_256_invert1(<4 x double> %c, <4 x double> %d, i32 %a, i32 %b) { 63; CHECK-LABEL: testpdz_256_invert1: 64; CHECK: # %bb.0: 65; CHECK-NEXT: movl %edi, %eax 66; CHECK-NEXT: vtestpd %ymm0, %ymm1 67; CHECK-NEXT: cmovael %esi, %eax 68; CHECK-NEXT: vzeroupper 69; CHECK-NEXT: retq 70 %t0 = bitcast <4 x double> %d to <4 x i64> 71 %t1 = xor <4 x i64> %t0, <i64 -1, i64 -1, i64 -1, i64 -1> 72 %t2 = bitcast <4 x i64> %t1 to <4 x double> 73 %t3 = call i32 @llvm.x86.avx.vtestz.pd.256(<4 x double> %c, <4 x double> %t2) 74 %t4 = icmp ne i32 %t3, 0 75 %t5 = select i1 %t4, i32 %a, i32 %b 76 ret i32 %t5 77} 78 79; 80; testc(~X,Y) -> testz(X,Y) 81; 82 83define i32 @testpdc_128_invert0(<2 x double> %c, <2 x double> %d, i32 %a, i32 %b) { 84; CHECK-LABEL: testpdc_128_invert0: 85; CHECK: # %bb.0: 86; CHECK-NEXT: movl %edi, %eax 87; CHECK-NEXT: vtestpd %xmm1, %xmm0 88; CHECK-NEXT: cmovnel %esi, %eax 89; CHECK-NEXT: retq 90 %t0 = bitcast <2 x double> %c to <2 x i64> 91 %t1 = xor <2 x i64> %t0, <i64 -1, i64 -1> 92 %t2 = bitcast <2 x i64> %t1 to <2 x double> 93 %t3 = call i32 @llvm.x86.avx.vtestc.pd(<2 x double> %t2, <2 x double> %d) 94 %t4 = icmp ne i32 %t3, 0 95 %t5 = select i1 %t4, i32 %a, i32 %b 96 ret i32 %t5 97} 98 99define i32 @testpdc_256_invert0(<4 x double> %c, <4 x double> %d, i32 %a, i32 %b) { 100; CHECK-LABEL: testpdc_256_invert0: 101; CHECK: # %bb.0: 102; CHECK-NEXT: movl %edi, %eax 103; CHECK-NEXT: vtestpd %ymm1, %ymm0 104; CHECK-NEXT: cmovnel %esi, %eax 105; CHECK-NEXT: vzeroupper 106; CHECK-NEXT: retq 107 %t0 = bitcast <4 x double> %c to <4 x i64> 108 %t1 = xor <4 x i64> %t0, <i64 -1, i64 -1, i64 -1, i64 -1> 109 %t2 = bitcast <4 x i64> %t1 to <4 x double> 110 %t3 = call i32 @llvm.x86.avx.vtestc.pd.256(<4 x double> %t2, <4 x double> %d) 111 %t4 = icmp ne i32 %t3, 0 112 %t5 = select i1 %t4, i32 %a, i32 %b 113 ret i32 %t5 114} 115 116; 117; testnzc(~X,Y) -> testnzc(X,Y) 118; 119 120define i32 @testpdnzc_128_invert0(<2 x double> %c, <2 x double> %d, i32 %a, i32 %b) { 121; CHECK-LABEL: testpdnzc_128_invert0: 122; CHECK: # %bb.0: 123; CHECK-NEXT: movl %edi, %eax 124; CHECK-NEXT: vtestpd %xmm1, %xmm0 125; CHECK-NEXT: cmovbel %esi, %eax 126; CHECK-NEXT: retq 127 %t0 = bitcast <2 x double> %c to <2 x i64> 128 %t1 = xor <2 x i64> %t0, <i64 -1, i64 -1> 129 %t2 = bitcast <2 x i64> %t1 to <2 x double> 130 %t3 = call i32 @llvm.x86.avx.vtestnzc.pd(<2 x double> %t2, <2 x double> %d) 131 %t4 = icmp ne i32 %t3, 0 132 %t5 = select i1 %t4, i32 %a, i32 %b 133 ret i32 %t5 134} 135 136define i32 @testpdnzc_256_invert0(<4 x double> %c, <4 x double> %d, i32 %a, i32 %b) { 137; CHECK-LABEL: testpdnzc_256_invert0: 138; CHECK: # %bb.0: 139; CHECK-NEXT: movl %edi, %eax 140; CHECK-NEXT: vtestpd %ymm1, %ymm0 141; CHECK-NEXT: cmovbel %esi, %eax 142; CHECK-NEXT: vzeroupper 143; CHECK-NEXT: retq 144 %t0 = bitcast <4 x double> %c to <4 x i64> 145 %t1 = xor <4 x i64> %t0, <i64 -1, i64 -1, i64 -1, i64 -1> 146 %t2 = bitcast <4 x i64> %t1 to <4 x double> 147 %t3 = call i32 @llvm.x86.avx.vtestnzc.pd.256(<4 x double> %t2, <4 x double> %d) 148 %t4 = icmp ne i32 %t3, 0 149 %t5 = select i1 %t4, i32 %a, i32 %b 150 ret i32 %t5 151} 152 153; 154; SimplifyDemandedBits - only the sign bit is required 155; 156 157define i32 @testpdc_128_signbit(<2 x double> %c, <2 x double> %d, i32 %a, i32 %b) { 158; CHECK-LABEL: testpdc_128_signbit: 159; CHECK: # %bb.0: 160; CHECK-NEXT: movl %edi, %eax 161; CHECK-NEXT: vtestpd %xmm1, %xmm0 162; CHECK-NEXT: cmovael %esi, %eax 163; CHECK-NEXT: retq 164 %t0 = bitcast <2 x double> %c to <2 x i64> 165 %t1 = ashr <2 x i64> %t0, <i64 63, i64 63> 166 %t2 = bitcast <2 x i64> %t1 to <2 x double> 167 %t3 = call i32 @llvm.x86.avx.vtestc.pd(<2 x double> %t2, <2 x double> %d) 168 %t4 = icmp ne i32 %t3, 0 169 %t5 = select i1 %t4, i32 %a, i32 %b 170 ret i32 %t5 171} 172 173define i32 @testpdz_256_signbit(<4 x double> %c, <4 x double> %d, i32 %a, i32 %b) { 174; CHECK-LABEL: testpdz_256_signbit: 175; CHECK: # %bb.0: 176; CHECK-NEXT: movl %edi, %eax 177; CHECK-NEXT: vtestpd %ymm1, %ymm0 178; CHECK-NEXT: cmovnel %esi, %eax 179; CHECK-NEXT: vzeroupper 180; CHECK-NEXT: retq 181 %t0 = bitcast <4 x double> %c to <4 x i64> 182 %t1 = icmp sgt <4 x i64> zeroinitializer, %t0 183 %t2 = sext <4 x i1> %t1 to <4 x i64> 184 %t3 = bitcast <4 x i64> %t2 to <4 x double> 185 %t4 = call i32 @llvm.x86.avx.vtestz.pd.256(<4 x double> %t3, <4 x double> %d) 186 %t5 = icmp ne i32 %t4, 0 187 %t6 = select i1 %t5, i32 %a, i32 %b 188 ret i32 %t6 189} 190 191define i32 @testpdnzc_256_signbit_multiuse(<4 x double> %c, i32 %a, i32 %b) { 192; CHECK-LABEL: testpdnzc_256_signbit_multiuse: 193; CHECK: # %bb.0: 194; CHECK-NEXT: movl %edi, %eax 195; CHECK-NEXT: vtestpd %ymm0, %ymm0 196; CHECK-NEXT: cmovnel %esi, %eax 197; CHECK-NEXT: vzeroupper 198; CHECK-NEXT: retq 199 %t0 = bitcast <4 x double> %c to <4 x i64> 200 %t1 = icmp sgt <4 x i64> zeroinitializer, %t0 201 %t2 = sext <4 x i1> %t1 to <4 x i64> 202 %t3 = bitcast <4 x i64> %t2 to <4 x double> 203 %t4 = call i32 @llvm.x86.avx.vtestz.pd.256(<4 x double> %t3, <4 x double> %t3) 204 %t5 = icmp ne i32 %t4, 0 205 %t6 = select i1 %t5, i32 %a, i32 %b 206 ret i32 %t6 207} 208 209define i1 @PR62171(<4 x double> %a0, <4 x double> %a1) { 210; CHECK-LABEL: PR62171: 211; CHECK: # %bb.0: 212; CHECK-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0 213; CHECK-NEXT: vtestpd %ymm0, %ymm0 214; CHECK-NEXT: sete %al 215; CHECK-NEXT: vzeroupper 216; CHECK-NEXT: retq 217 %cmp = fcmp oeq <4 x double> %a0, %a1 218 %sext = sext <4 x i1> %cmp to <4 x i64> 219 %extract = shufflevector <4 x i64> %sext, <4 x i64> poison, <2 x i32> <i32 0, i32 1> 220 %extract1 = shufflevector <4 x i64> %sext, <4 x i64> poison, <2 x i32> <i32 2, i32 3> 221 %or = or <2 x i64> %extract, %extract1 222 %or1 = bitcast <2 x i64> %or to <16 x i8> 223 %msk = icmp slt <16 x i8> %or1, zeroinitializer 224 %msk1 = bitcast <16 x i1> %msk to i16 225 %not = icmp eq i16 %msk1, 0 226 ret i1 %not 227} 228 229define void @combine_testp_v4f64(<4 x i64> %x){ 230; AVX-LABEL: combine_testp_v4f64: 231; AVX: # %bb.0: # %entry 232; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 233; AVX-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1 234; AVX-NEXT: vtestpd %ymm1, %ymm0 235; AVX-NEXT: vzeroupper 236; AVX-NEXT: retq 237; 238; AVX2-LABEL: combine_testp_v4f64: 239; AVX2: # %bb.0: # %entry 240; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 241; AVX2-NEXT: vtestpd %ymm1, %ymm0 242; AVX2-NEXT: vzeroupper 243; AVX2-NEXT: retq 244entry: 245 %xor.i.i.i.i.i.i.i.i.i = xor <4 x i64> %x, <i64 -1, i64 -1, i64 -1, i64 -1> 246 %.cast.i.i.i.i.i.i = bitcast <4 x i64> %xor.i.i.i.i.i.i.i.i.i to <4 x double> 247 %0 = call i32 @llvm.x86.avx.vtestz.pd.256(<4 x double> %.cast.i.i.i.i.i.i, <4 x double> %.cast.i.i.i.i.i.i) 248 %cmp.i.not.i.i.i.i.i.i = icmp eq i32 %0, 0 249 br i1 %cmp.i.not.i.i.i.i.i.i, label %if.end3.i.i.i.i.i.i, label %end 250 251if.end3.i.i.i.i.i.i: ; preds = %entry 252 ret void 253 254end: ; preds = %entry 255 ret void 256} 257 258define i32 @PR88958_1(ptr %0, <2 x double> %1) { 259; SSE-LABEL: PR88958_1: 260; SSE: # %bb.0: 261; SSE-NEXT: xorl %eax, %eax 262; SSE-NEXT: ptest (%rdi), %xmm0 263; SSE-NEXT: sete %al 264; SSE-NEXT: retq 265; 266; CHECK-LABEL: PR88958_1: 267; CHECK: # %bb.0: 268; CHECK-NEXT: xorl %eax, %eax 269; CHECK-NEXT: vtestpd (%rdi), %xmm0 270; CHECK-NEXT: sete %al 271; CHECK-NEXT: retq 272 %3 = load <2 x double>, ptr %0 273 %4 = tail call i32 @llvm.x86.avx.vtestz.pd(<2 x double> %3, <2 x double> %1) 274 ret i32 %4 275} 276 277define i32 @PR88958_2(ptr %0, <2 x double> %1) { 278; SSE-LABEL: PR88958_2: 279; SSE: # %bb.0: 280; SSE-NEXT: movdqa (%rdi), %xmm1 281; SSE-NEXT: xorl %eax, %eax 282; SSE-NEXT: ptest %xmm0, %xmm1 283; SSE-NEXT: setb %al 284; SSE-NEXT: retq 285; 286; CHECK-LABEL: PR88958_2: 287; CHECK: # %bb.0: 288; CHECK-NEXT: vmovapd (%rdi), %xmm1 289; CHECK-NEXT: xorl %eax, %eax 290; CHECK-NEXT: vtestpd %xmm0, %xmm1 291; CHECK-NEXT: setb %al 292; CHECK-NEXT: retq 293 %3 = load <2 x double>, ptr %0 294 %4 = tail call i32 @llvm.x86.avx.vtestc.pd(<2 x double> %3, <2 x double> %1) 295 ret i32 %4 296} 297 298define i32 @PR88958_3(ptr %0, <4 x double> %1) { 299; SSE-LABEL: PR88958_1: 300; SSE: # %bb.0: 301; SSE-NEXT: xorl %eax, %eax 302; SSE-NEXT: ptest (%rdi), %xmm0 303; SSE-NEXT: sete %al 304; SSE-NEXT: retq 305; 306; CHECK-LABEL: PR88958_3: 307; CHECK: # %bb.0: 308; CHECK-NEXT: xorl %eax, %eax 309; CHECK-NEXT: vtestpd (%rdi), %ymm0 310; CHECK-NEXT: sete %al 311; CHECK-NEXT: vzeroupper 312; CHECK-NEXT: retq 313 %3 = load <4 x double>, ptr %0 314 %4 = tail call i32 @llvm.x86.avx.vtestz.pd.256(<4 x double> %3, <4 x double> %1) 315 ret i32 %4 316} 317 318define i32 @PR88958_4(ptr %0, <4 x double> %1) { 319; SSE-LABEL: PR88958_2: 320; SSE: # %bb.0: 321; SSE-NEXT: movdqa (%rdi), %xmm1 322; SSE-NEXT: xorl %eax, %eax 323; SSE-NEXT: ptest %xmm0, %xmm1 324; SSE-NEXT: setb %al 325; SSE-NEXT: retq 326; 327; CHECK-LABEL: PR88958_4: 328; CHECK: # %bb.0: 329; CHECK-NEXT: vmovapd (%rdi), %ymm1 330; CHECK-NEXT: xorl %eax, %eax 331; CHECK-NEXT: vtestpd %ymm0, %ymm1 332; CHECK-NEXT: setb %al 333; CHECK-NEXT: vzeroupper 334; CHECK-NEXT: retq 335 %3 = load <4 x double>, ptr %0 336 %4 = tail call i32 @llvm.x86.avx.vtestc.pd.256(<4 x double> %3, <4 x double> %1) 337 ret i32 %4 338} 339 340declare i32 @llvm.x86.avx.vtestz.pd(<2 x double>, <2 x double>) nounwind readnone 341declare i32 @llvm.x86.avx.vtestc.pd(<2 x double>, <2 x double>) nounwind readnone 342declare i32 @llvm.x86.avx.vtestnzc.pd(<2 x double>, <2 x double>) nounwind readnone 343 344declare i32 @llvm.x86.avx.vtestz.pd.256(<4 x double>, <4 x double>) nounwind readnone 345declare i32 @llvm.x86.avx.vtestc.pd.256(<4 x double>, <4 x double>) nounwind readnone 346declare i32 @llvm.x86.avx.vtestnzc.pd.256(<4 x double>, <4 x double>) nounwind readnone 347