1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX1 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX2 4 5; 6; testz(~X,Y) -> testc(X,Y) 7; 8 9define i32 @ptestz_256_invert0(<4 x i64> %c, <4 x i64> %d, i32 %a, i32 %b) { 10; CHECK-LABEL: ptestz_256_invert0: 11; CHECK: # %bb.0: 12; CHECK-NEXT: movl %edi, %eax 13; CHECK-NEXT: vptest %ymm1, %ymm0 14; CHECK-NEXT: cmovael %esi, %eax 15; CHECK-NEXT: vzeroupper 16; CHECK-NEXT: retq 17 %t1 = xor <4 x i64> %c, <i64 -1, i64 -1, i64 -1, i64 -1> 18 %t2 = call i32 @llvm.x86.avx.ptestz.256(<4 x i64> %t1, <4 x i64> %d) 19 %t3 = icmp ne i32 %t2, 0 20 %t4 = select i1 %t3, i32 %a, i32 %b 21 ret i32 %t4 22} 23 24; 25; testz(X,~Y) -> testc(Y,X) 26; 27 28define i32 @ptestz_256_invert1(<4 x i64> %c, <4 x i64> %d, i32 %a, i32 %b) { 29; CHECK-LABEL: ptestz_256_invert1: 30; CHECK: # %bb.0: 31; CHECK-NEXT: movl %edi, %eax 32; CHECK-NEXT: vptest %ymm0, %ymm1 33; CHECK-NEXT: cmovael %esi, %eax 34; CHECK-NEXT: vzeroupper 35; CHECK-NEXT: retq 36 %t1 = xor <4 x i64> %d, <i64 -1, i64 -1, i64 -1, i64 -1> 37 %t2 = call i32 @llvm.x86.avx.ptestz.256(<4 x i64> %c, <4 x i64> %t1) 38 %t3 = icmp ne i32 %t2, 0 39 %t4 = select i1 %t3, i32 %a, i32 %b 40 ret i32 %t4 41} 42 43; 44; testc(~X,Y) -> testz(X,Y) 45; 46 47define i32 @ptestc_256_invert0(<4 x i64> %c, <4 x i64> %d, i32 %a, i32 %b) { 48; CHECK-LABEL: ptestc_256_invert0: 49; CHECK: # %bb.0: 50; CHECK-NEXT: movl %edi, %eax 51; CHECK-NEXT: vptest %ymm1, %ymm0 52; CHECK-NEXT: cmovnel %esi, %eax 53; CHECK-NEXT: vzeroupper 54; CHECK-NEXT: retq 55 %t1 = xor <4 x i64> %c, <i64 -1, i64 -1, i64 -1, i64 -1> 56 %t2 = call i32 @llvm.x86.avx.ptestc.256(<4 x i64> %t1, <4 x i64> %d) 57 %t3 = icmp ne i32 %t2, 0 58 %t4 = select i1 %t3, i32 %a, i32 %b 59 ret i32 %t4 60} 61 62; 63; testnzc(~X,Y) -> testnzc(X,Y) 64; 65 66define i32 @ptestnzc_256_invert0(<4 x i64> %c, <4 x i64> %d, i32 %a, i32 %b) { 67; CHECK-LABEL: ptestnzc_256_invert0: 68; CHECK: # %bb.0: 69; CHECK-NEXT: movl %edi, %eax 70; CHECK-NEXT: vptest %ymm1, %ymm0 71; CHECK-NEXT: cmovbel %esi, %eax 72; CHECK-NEXT: vzeroupper 73; CHECK-NEXT: retq 74 %t1 = xor <4 x i64> %c, <i64 -1, i64 -1, i64 -1, i64 -1> 75 %t2 = call i32 @llvm.x86.avx.ptestnzc.256(<4 x i64> %t1, <4 x i64> %d) 76 %t3 = icmp ne i32 %t2, 0 77 %t4 = select i1 %t3, i32 %a, i32 %b 78 ret i32 %t4 79} 80 81define i32 @ptestnzc_256_invert0_commute(<4 x i64> %c, <4 x i64> %d, i32 %a, i32 %b) { 82; CHECK-LABEL: ptestnzc_256_invert0_commute: 83; CHECK: # %bb.0: 84; CHECK-NEXT: movl %edi, %eax 85; CHECK-NEXT: vptest %ymm1, %ymm0 86; CHECK-NEXT: cmoval %esi, %eax 87; CHECK-NEXT: vzeroupper 88; CHECK-NEXT: retq 89 %t1 = xor <4 x i64> %c, <i64 -1, i64 -1, i64 -1, i64 -1> 90 %t2 = call i32 @llvm.x86.avx.ptestnzc.256(<4 x i64> %t1, <4 x i64> %d) 91 %t3 = icmp eq i32 %t2, 0 92 %t4 = select i1 %t3, i32 %a, i32 %b 93 ret i32 %t4 94} 95 96; 97; testc(X,~X) -> testc(X,-1) 98; 99 100define i32 @ptestc_256_not(<4 x i64> %c, <4 x i64> %d, i32 %a, i32 %b) { 101; AVX1-LABEL: ptestc_256_not: 102; AVX1: # %bb.0: 103; AVX1-NEXT: movl %edi, %eax 104; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 105; AVX1-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1 106; AVX1-NEXT: vptest %ymm1, %ymm0 107; AVX1-NEXT: cmovael %esi, %eax 108; AVX1-NEXT: vzeroupper 109; AVX1-NEXT: retq 110; 111; AVX2-LABEL: ptestc_256_not: 112; AVX2: # %bb.0: 113; AVX2-NEXT: movl %edi, %eax 114; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 115; AVX2-NEXT: vptest %ymm1, %ymm0 116; AVX2-NEXT: cmovael %esi, %eax 117; AVX2-NEXT: vzeroupper 118; AVX2-NEXT: retq 119 %t1 = xor <4 x i64> %c, <i64 -1, i64 -1, i64 -1, i64 -1> 120 %t2 = call i32 @llvm.x86.avx.ptestc.256(<4 x i64> %c, <4 x i64> %t1) 121 %t3 = icmp ne i32 %t2, 0 122 %t4 = select i1 %t3, i32 %a, i32 %b 123 ret i32 %t4 124} 125 126; 127; testz(AND(X,Y),AND(X,Y)) -> testz(X,Y) 128; 129 130define i32 @ptestz_256_and(<4 x i64> %c, <4 x i64> %d, i32 %a, i32 %b) { 131; CHECK-LABEL: ptestz_256_and: 132; CHECK: # %bb.0: 133; CHECK-NEXT: movl %edi, %eax 134; CHECK-NEXT: vptest %ymm1, %ymm0 135; CHECK-NEXT: cmovel %esi, %eax 136; CHECK-NEXT: vzeroupper 137; CHECK-NEXT: retq 138 %t1 = and <4 x i64> %c, %d 139 %t2 = call i32 @llvm.x86.avx.ptestz.256(<4 x i64> %t1, <4 x i64> %t1) 140 %t3 = icmp eq i32 %t2, 0 141 %t4 = select i1 %t3, i32 %a, i32 %b 142 ret i32 %t4 143} 144 145; 146; testz(AND(~X,Y),AND(~X,Y)) -> testc(X,Y) 147; 148 149define i32 @ptestz_256_andc(<4 x i64> %c, <4 x i64> %d, i32 %a, i32 %b) { 150; CHECK-LABEL: ptestz_256_andc: 151; CHECK: # %bb.0: 152; CHECK-NEXT: movl %edi, %eax 153; CHECK-NEXT: vptest %ymm1, %ymm0 154; CHECK-NEXT: cmovbl %esi, %eax 155; CHECK-NEXT: vzeroupper 156; CHECK-NEXT: retq 157 %t1 = xor <4 x i64> %c, <i64 -1, i64 -1, i64 -1, i64 -1> 158 %t2 = and <4 x i64> %t1, %d 159 %t3 = call i32 @llvm.x86.avx.ptestz.256(<4 x i64> %t2, <4 x i64> %t2) 160 %t4 = icmp eq i32 %t3, 0 161 %t5 = select i1 %t4, i32 %a, i32 %b 162 ret i32 %t5 163} 164 165; 166; testz(-1,X) -> testz(X,X) 167; 168 169define i32 @ptestz_256_allones0(<4 x i64> %c, i32 %a, i32 %b) { 170; CHECK-LABEL: ptestz_256_allones0: 171; CHECK: # %bb.0: 172; CHECK-NEXT: movl %edi, %eax 173; CHECK-NEXT: vptest %ymm0, %ymm0 174; CHECK-NEXT: cmovnel %esi, %eax 175; CHECK-NEXT: vzeroupper 176; CHECK-NEXT: retq 177 %t1 = call i32 @llvm.x86.avx.ptestz.256(<4 x i64> <i64 -1, i64 -1, i64 -1, i64 -1>, <4 x i64> %c) 178 %t2 = icmp ne i32 %t1, 0 179 %t3 = select i1 %t2, i32 %a, i32 %b 180 ret i32 %t3 181} 182 183; 184; testz(X,-1) -> testz(X,X) 185; 186 187define i32 @ptestz_256_allones1(<4 x i64> %c, i32 %a, i32 %b) { 188; CHECK-LABEL: ptestz_256_allones1: 189; CHECK: # %bb.0: 190; CHECK-NEXT: movl %edi, %eax 191; CHECK-NEXT: vptest %ymm0, %ymm0 192; CHECK-NEXT: cmovnel %esi, %eax 193; CHECK-NEXT: vzeroupper 194; CHECK-NEXT: retq 195 %t1 = call i32 @llvm.x86.avx.ptestz.256(<4 x i64> %c, <4 x i64> <i64 -1, i64 -1, i64 -1, i64 -1>) 196 %t2 = icmp ne i32 %t1, 0 197 %t3 = select i1 %t2, i32 %a, i32 %b 198 ret i32 %t3 199} 200 201; 202; testz(ashr(X,bw-1),-1) -> testpd/testps/pmovmskb(X) 203; 204 205define i32 @ptestz_v8i32_signbits(<8 x i32> %c, i32 %a, i32 %b) { 206; AVX1-LABEL: ptestz_v8i32_signbits: 207; AVX1: # %bb.0: 208; AVX1-NEXT: movl %edi, %eax 209; AVX1-NEXT: vpsrad $31, %xmm0, %xmm1 210; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 211; AVX1-NEXT: vpsrad $31, %xmm0, %xmm0 212; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 213; AVX1-NEXT: vptest %ymm0, %ymm0 214; AVX1-NEXT: cmovnel %esi, %eax 215; AVX1-NEXT: vzeroupper 216; AVX1-NEXT: retq 217; 218; AVX2-LABEL: ptestz_v8i32_signbits: 219; AVX2: # %bb.0: 220; AVX2-NEXT: movl %edi, %eax 221; AVX2-NEXT: vtestps %ymm0, %ymm0 222; AVX2-NEXT: cmovnel %esi, %eax 223; AVX2-NEXT: vzeroupper 224; AVX2-NEXT: retq 225 %t1 = ashr <8 x i32> %c, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31> 226 %t2 = bitcast <8 x i32> %t1 to <4 x i64> 227 %t3 = call i32 @llvm.x86.avx.ptestz.256(<4 x i64> %t2, <4 x i64> <i64 -1, i64 -1, i64 -1, i64 -1>) 228 %t4 = icmp ne i32 %t3, 0 229 %t5 = select i1 %t4, i32 %a, i32 %b 230 ret i32 %t5 231} 232 233define i32 @ptestz_v32i8_signbits(<32 x i8> %c, i32 %a, i32 %b) { 234; AVX1-LABEL: ptestz_v32i8_signbits: 235; AVX1: # %bb.0: 236; AVX1-NEXT: movl %edi, %eax 237; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 238; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 239; AVX1-NEXT: vpcmpgtb %xmm1, %xmm2, %xmm1 240; AVX1-NEXT: vpcmpgtb %xmm0, %xmm2, %xmm0 241; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 242; AVX1-NEXT: vptest %ymm0, %ymm0 243; AVX1-NEXT: cmovnel %esi, %eax 244; AVX1-NEXT: vzeroupper 245; AVX1-NEXT: retq 246; 247; AVX2-LABEL: ptestz_v32i8_signbits: 248; AVX2: # %bb.0: 249; AVX2-NEXT: movl %edi, %eax 250; AVX2-NEXT: vpmovmskb %ymm0, %ecx 251; AVX2-NEXT: testl %ecx, %ecx 252; AVX2-NEXT: cmovnel %esi, %eax 253; AVX2-NEXT: vzeroupper 254; AVX2-NEXT: retq 255 %t1 = ashr <32 x i8> %c, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7> 256 %t2 = bitcast <32 x i8> %t1 to <4 x i64> 257 %t3 = call i32 @llvm.x86.avx.ptestz.256(<4 x i64> %t2, <4 x i64> <i64 -1, i64 -1, i64 -1, i64 -1>) 258 %t4 = icmp ne i32 %t3, 0 259 %t5 = select i1 %t4, i32 %a, i32 %b 260 ret i32 %t5 261} 262 263; 264; testz(or(extract_lo(X),extract_hi(X),or(extract_lo(Y),extract_hi(Y)) -> testz(X,Y) 265; 266 267; FIXME: Foldable to ptest(xor(%0,%1),xor(%0,%1)) 268define i1 @PR38788(<16 x i16> %0, <16 x i16> %1) { 269; AVX1-LABEL: PR38788: 270; AVX1: # %bb.0: 271; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 272; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 273; AVX1-NEXT: vpcmpeqw %xmm2, %xmm3, %xmm2 274; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 275; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 276; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 277; AVX1-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1 278; AVX1-NEXT: vptest %ymm1, %ymm0 279; AVX1-NEXT: setae %al 280; AVX1-NEXT: vzeroupper 281; AVX1-NEXT: retq 282; 283; AVX2-LABEL: PR38788: 284; AVX2: # %bb.0: 285; AVX2-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 286; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 287; AVX2-NEXT: vptest %ymm1, %ymm0 288; AVX2-NEXT: setae %al 289; AVX2-NEXT: vzeroupper 290; AVX2-NEXT: retq 291 %3 = icmp eq <16 x i16> %0, %1 292 %4 = sext <16 x i1> %3 to <16 x i16> 293 %5 = bitcast <16 x i16> %4 to <4 x i64> 294 %6 = tail call i32 @llvm.x86.avx.ptestc.256(<4 x i64> %5, <4 x i64> <i64 -1, i64 -1, i64 -1, i64 -1>) 295 %7 = icmp eq i32 %6, 0 296 ret i1 %7 297} 298 299declare i32 @llvm.x86.avx.ptestz.256(<4 x i64>, <4 x i64>) 300declare i32 @llvm.x86.avx.ptestc.256(<4 x i64>, <4 x i64>) 301declare i32 @llvm.x86.avx.ptestnzc.256(<4 x i64>, <4 x i64>) 302