1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1OR2,AVX1 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX1OR2,AVX2 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX,AVX512,AVX512F 7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX,AVX512,AVX512BW 8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=AVX,AVX512,AVX512VL 9 10; 11; vXi64 12; 13 14define i1 @test_v2i64(ptr %ptr) nounwind { 15; SSE2-LABEL: test_v2i64: 16; SSE2: # %bb.0: 17; SSE2-NEXT: pcmpeqd %xmm0, %xmm0 18; SSE2-NEXT: pcmpeqd (%rdi), %xmm0 19; SSE2-NEXT: movmskps %xmm0, %eax 20; SSE2-NEXT: xorl $15, %eax 21; SSE2-NEXT: sete %al 22; SSE2-NEXT: retq 23; 24; SSE41-LABEL: test_v2i64: 25; SSE41: # %bb.0: 26; SSE41-NEXT: movdqa (%rdi), %xmm0 27; SSE41-NEXT: pcmpeqd %xmm1, %xmm1 28; SSE41-NEXT: ptest %xmm1, %xmm0 29; SSE41-NEXT: setb %al 30; SSE41-NEXT: retq 31; 32; AVX-LABEL: test_v2i64: 33; AVX: # %bb.0: 34; AVX-NEXT: vmovdqa (%rdi), %xmm0 35; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 36; AVX-NEXT: vptest %xmm1, %xmm0 37; AVX-NEXT: setb %al 38; AVX-NEXT: retq 39 %vload = load <2 x i64>, ptr %ptr 40 %v0 = extractelement <2 x i64> %vload, i32 0 41 %v1 = extractelement <2 x i64> %vload, i32 1 42 %vreduce = and i64 %v0, %v1 43 %vcheck = icmp eq i64 %vreduce, -1 44 ret i1 %vcheck 45} 46 47define i1 @test_v4i64(ptr %ptr) nounwind { 48; SSE2-LABEL: test_v4i64: 49; SSE2: # %bb.0: 50; SSE2-NEXT: movdqa (%rdi), %xmm0 51; SSE2-NEXT: pand 16(%rdi), %xmm0 52; SSE2-NEXT: pcmpeqd %xmm1, %xmm1 53; SSE2-NEXT: pcmpeqd %xmm0, %xmm1 54; SSE2-NEXT: movmskps %xmm1, %eax 55; SSE2-NEXT: xorl $15, %eax 56; SSE2-NEXT: sete %al 57; SSE2-NEXT: retq 58; 59; SSE41-LABEL: test_v4i64: 60; SSE41: # %bb.0: 61; SSE41-NEXT: movdqa (%rdi), %xmm0 62; SSE41-NEXT: pand 16(%rdi), %xmm0 63; SSE41-NEXT: pcmpeqd %xmm1, %xmm1 64; SSE41-NEXT: ptest %xmm1, %xmm0 65; SSE41-NEXT: setb %al 66; SSE41-NEXT: retq 67; 68; AVX1-LABEL: test_v4i64: 69; AVX1: # %bb.0: 70; AVX1-NEXT: vmovdqa (%rdi), %ymm0 71; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 72; AVX1-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1 73; AVX1-NEXT: vptest %ymm1, %ymm0 74; AVX1-NEXT: setb %al 75; AVX1-NEXT: vzeroupper 76; AVX1-NEXT: retq 77; 78; AVX2-LABEL: test_v4i64: 79; AVX2: # %bb.0: 80; AVX2-NEXT: vmovdqa (%rdi), %ymm0 81; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 82; AVX2-NEXT: vptest %ymm1, %ymm0 83; AVX2-NEXT: setb %al 84; AVX2-NEXT: vzeroupper 85; AVX2-NEXT: retq 86; 87; AVX512-LABEL: test_v4i64: 88; AVX512: # %bb.0: 89; AVX512-NEXT: vmovdqa (%rdi), %ymm0 90; AVX512-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 91; AVX512-NEXT: vptest %ymm1, %ymm0 92; AVX512-NEXT: setb %al 93; AVX512-NEXT: vzeroupper 94; AVX512-NEXT: retq 95 %vload = load <4 x i64>, ptr %ptr 96 %v0 = extractelement <4 x i64> %vload, i32 0 97 %v1 = extractelement <4 x i64> %vload, i32 1 98 %v2 = extractelement <4 x i64> %vload, i32 2 99 %v3 = extractelement <4 x i64> %vload, i32 3 100 %vreduce01 = and i64 %v0, %v1 101 %vreduce23 = and i64 %v2, %v3 102 %vreduce = and i64 %vreduce01, %vreduce23 103 %vcheck = icmp eq i64 %vreduce, -1 104 ret i1 %vcheck 105} 106 107define i1 @test_v8i64(ptr %ptr) nounwind { 108; SSE2-LABEL: test_v8i64: 109; SSE2: # %bb.0: 110; SSE2-NEXT: movdqa (%rdi), %xmm0 111; SSE2-NEXT: movdqa 16(%rdi), %xmm1 112; SSE2-NEXT: pand 48(%rdi), %xmm1 113; SSE2-NEXT: pand 32(%rdi), %xmm0 114; SSE2-NEXT: pand %xmm1, %xmm0 115; SSE2-NEXT: pcmpeqd %xmm1, %xmm1 116; SSE2-NEXT: pcmpeqd %xmm0, %xmm1 117; SSE2-NEXT: movmskps %xmm1, %eax 118; SSE2-NEXT: xorl $15, %eax 119; SSE2-NEXT: sete %al 120; SSE2-NEXT: retq 121; 122; SSE41-LABEL: test_v8i64: 123; SSE41: # %bb.0: 124; SSE41-NEXT: movdqa (%rdi), %xmm0 125; SSE41-NEXT: movdqa 16(%rdi), %xmm1 126; SSE41-NEXT: pand 48(%rdi), %xmm1 127; SSE41-NEXT: pand 32(%rdi), %xmm0 128; SSE41-NEXT: pand %xmm1, %xmm0 129; SSE41-NEXT: pcmpeqd %xmm1, %xmm1 130; SSE41-NEXT: ptest %xmm1, %xmm0 131; SSE41-NEXT: setb %al 132; SSE41-NEXT: retq 133; 134; AVX1-LABEL: test_v8i64: 135; AVX1: # %bb.0: 136; AVX1-NEXT: vmovaps (%rdi), %ymm0 137; AVX1-NEXT: vandps 32(%rdi), %ymm0, %ymm0 138; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 139; AVX1-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1 140; AVX1-NEXT: vptest %ymm1, %ymm0 141; AVX1-NEXT: setb %al 142; AVX1-NEXT: vzeroupper 143; AVX1-NEXT: retq 144; 145; AVX2-LABEL: test_v8i64: 146; AVX2: # %bb.0: 147; AVX2-NEXT: vmovdqa (%rdi), %ymm0 148; AVX2-NEXT: vpand 32(%rdi), %ymm0, %ymm0 149; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 150; AVX2-NEXT: vptest %ymm1, %ymm0 151; AVX2-NEXT: setb %al 152; AVX2-NEXT: vzeroupper 153; AVX2-NEXT: retq 154; 155; AVX512-LABEL: test_v8i64: 156; AVX512: # %bb.0: 157; AVX512-NEXT: vpternlogd {{.*#+}} zmm0 = -1 158; AVX512-NEXT: vpcmpneqd (%rdi), %zmm0, %k0 159; AVX512-NEXT: kortestw %k0, %k0 160; AVX512-NEXT: sete %al 161; AVX512-NEXT: vzeroupper 162; AVX512-NEXT: retq 163 %vload = load <8 x i64>, ptr %ptr 164 %v0 = extractelement <8 x i64> %vload, i32 0 165 %v1 = extractelement <8 x i64> %vload, i32 1 166 %v2 = extractelement <8 x i64> %vload, i32 2 167 %v3 = extractelement <8 x i64> %vload, i32 3 168 %v4 = extractelement <8 x i64> %vload, i32 4 169 %v5 = extractelement <8 x i64> %vload, i32 5 170 %v6 = extractelement <8 x i64> %vload, i32 6 171 %v7 = extractelement <8 x i64> %vload, i32 7 172 %vreduce01 = and i64 %v0, %v1 173 %vreduce23 = and i64 %v2, %v3 174 %vreduce45 = and i64 %v4, %v5 175 %vreduce67 = and i64 %v6, %v7 176 %vreduce0123 = and i64 %vreduce01, %vreduce23 177 %vreduce4567 = and i64 %vreduce45, %vreduce67 178 %vreduce = and i64 %vreduce0123, %vreduce4567 179 %vcheck = icmp eq i64 %vreduce, -1 180 ret i1 %vcheck 181} 182 183define i1 @test_v16i64(ptr %ptr) nounwind { 184; SSE2-LABEL: test_v16i64: 185; SSE2: # %bb.0: 186; SSE2-NEXT: movdqa (%rdi), %xmm0 187; SSE2-NEXT: movdqa 16(%rdi), %xmm1 188; SSE2-NEXT: movdqa 32(%rdi), %xmm2 189; SSE2-NEXT: movdqa 48(%rdi), %xmm3 190; SSE2-NEXT: pand 112(%rdi), %xmm3 191; SSE2-NEXT: pand 80(%rdi), %xmm1 192; SSE2-NEXT: pand %xmm3, %xmm1 193; SSE2-NEXT: pand 96(%rdi), %xmm2 194; SSE2-NEXT: pand 64(%rdi), %xmm0 195; SSE2-NEXT: pand %xmm2, %xmm0 196; SSE2-NEXT: pand %xmm1, %xmm0 197; SSE2-NEXT: pcmpeqd %xmm1, %xmm1 198; SSE2-NEXT: pcmpeqd %xmm0, %xmm1 199; SSE2-NEXT: movmskps %xmm1, %eax 200; SSE2-NEXT: xorl $15, %eax 201; SSE2-NEXT: sete %al 202; SSE2-NEXT: retq 203; 204; SSE41-LABEL: test_v16i64: 205; SSE41: # %bb.0: 206; SSE41-NEXT: movdqa (%rdi), %xmm0 207; SSE41-NEXT: movdqa 16(%rdi), %xmm1 208; SSE41-NEXT: movdqa 32(%rdi), %xmm2 209; SSE41-NEXT: movdqa 48(%rdi), %xmm3 210; SSE41-NEXT: pand 112(%rdi), %xmm3 211; SSE41-NEXT: pand 80(%rdi), %xmm1 212; SSE41-NEXT: pand %xmm3, %xmm1 213; SSE41-NEXT: pand 96(%rdi), %xmm2 214; SSE41-NEXT: pand 64(%rdi), %xmm0 215; SSE41-NEXT: pand %xmm2, %xmm0 216; SSE41-NEXT: pand %xmm1, %xmm0 217; SSE41-NEXT: pcmpeqd %xmm1, %xmm1 218; SSE41-NEXT: ptest %xmm1, %xmm0 219; SSE41-NEXT: setb %al 220; SSE41-NEXT: retq 221; 222; AVX1-LABEL: test_v16i64: 223; AVX1: # %bb.0: 224; AVX1-NEXT: vmovaps (%rdi), %ymm0 225; AVX1-NEXT: vmovaps 32(%rdi), %ymm1 226; AVX1-NEXT: vandps 96(%rdi), %ymm1, %ymm1 227; AVX1-NEXT: vandps 64(%rdi), %ymm0, %ymm0 228; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 229; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 230; AVX1-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1 231; AVX1-NEXT: vptest %ymm1, %ymm0 232; AVX1-NEXT: setb %al 233; AVX1-NEXT: vzeroupper 234; AVX1-NEXT: retq 235; 236; AVX2-LABEL: test_v16i64: 237; AVX2: # %bb.0: 238; AVX2-NEXT: vmovdqa (%rdi), %ymm0 239; AVX2-NEXT: vmovdqa 32(%rdi), %ymm1 240; AVX2-NEXT: vpand 96(%rdi), %ymm1, %ymm1 241; AVX2-NEXT: vpand 64(%rdi), %ymm0, %ymm0 242; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 243; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 244; AVX2-NEXT: vptest %ymm1, %ymm0 245; AVX2-NEXT: setb %al 246; AVX2-NEXT: vzeroupper 247; AVX2-NEXT: retq 248; 249; AVX512-LABEL: test_v16i64: 250; AVX512: # %bb.0: 251; AVX512-NEXT: vmovdqa64 (%rdi), %zmm0 252; AVX512-NEXT: vpandq 64(%rdi), %zmm0, %zmm0 253; AVX512-NEXT: vpternlogd {{.*#+}} zmm1 = -1 254; AVX512-NEXT: vpcmpneqd %zmm1, %zmm0, %k0 255; AVX512-NEXT: kortestw %k0, %k0 256; AVX512-NEXT: sete %al 257; AVX512-NEXT: vzeroupper 258; AVX512-NEXT: retq 259 %vload = load <16 x i64>, ptr %ptr 260 %v0 = extractelement <16 x i64> %vload, i32 0 261 %v1 = extractelement <16 x i64> %vload, i32 1 262 %v2 = extractelement <16 x i64> %vload, i32 2 263 %v3 = extractelement <16 x i64> %vload, i32 3 264 %v4 = extractelement <16 x i64> %vload, i32 4 265 %v5 = extractelement <16 x i64> %vload, i32 5 266 %v6 = extractelement <16 x i64> %vload, i32 6 267 %v7 = extractelement <16 x i64> %vload, i32 7 268 %v8 = extractelement <16 x i64> %vload, i32 8 269 %v9 = extractelement <16 x i64> %vload, i32 9 270 %v10 = extractelement <16 x i64> %vload, i32 10 271 %v11 = extractelement <16 x i64> %vload, i32 11 272 %v12 = extractelement <16 x i64> %vload, i32 12 273 %v13 = extractelement <16 x i64> %vload, i32 13 274 %v14 = extractelement <16 x i64> %vload, i32 14 275 %v15 = extractelement <16 x i64> %vload, i32 15 276 %vreduce01 = and i64 %v0, %v1 277 %vreduce23 = and i64 %v2, %v3 278 %vreduce45 = and i64 %v4, %v5 279 %vreduce67 = and i64 %v6, %v7 280 %vreduce89 = and i64 %v8, %v9 281 %vreduce1011 = and i64 %v10, %v11 282 %vreduce1213 = and i64 %v12, %v13 283 %vreduce1415 = and i64 %v14, %v15 284 %vreduce0123 = and i64 %vreduce01, %vreduce23 285 %vreduce4567 = and i64 %vreduce45, %vreduce67 286 %vreduce891011 = and i64 %vreduce89, %vreduce1011 287 %vreduce12131415 = and i64 %vreduce1213, %vreduce1415 288 %vreduce01234567 = and i64 %vreduce0123, %vreduce4567 289 %vreduce89101112131415 = and i64 %vreduce891011, %vreduce12131415 290 %vreduce = and i64 %vreduce01234567, %vreduce89101112131415 291 %vcheck = icmp eq i64 %vreduce, -1 292 ret i1 %vcheck 293} 294 295; 296; vXi32 297; 298 299define i1 @test_v2i32(ptr %ptr) nounwind { 300; SSE-LABEL: test_v2i32: 301; SSE: # %bb.0: 302; SSE-NEXT: cmpq $-1, (%rdi) 303; SSE-NEXT: sete %al 304; SSE-NEXT: retq 305; 306; AVX-LABEL: test_v2i32: 307; AVX: # %bb.0: 308; AVX-NEXT: cmpq $-1, (%rdi) 309; AVX-NEXT: sete %al 310; AVX-NEXT: retq 311 %vload = load <2 x i32>, ptr %ptr 312 %v0 = extractelement <2 x i32> %vload, i32 0 313 %v1 = extractelement <2 x i32> %vload, i32 1 314 %vreduce = and i32 %v0, %v1 315 %vcheck = icmp eq i32 %vreduce, -1 316 ret i1 %vcheck 317} 318 319define i1 @test_v4i32(ptr %ptr) nounwind { 320; SSE2-LABEL: test_v4i32: 321; SSE2: # %bb.0: 322; SSE2-NEXT: pcmpeqd %xmm0, %xmm0 323; SSE2-NEXT: pcmpeqd (%rdi), %xmm0 324; SSE2-NEXT: movmskps %xmm0, %eax 325; SSE2-NEXT: xorl $15, %eax 326; SSE2-NEXT: sete %al 327; SSE2-NEXT: retq 328; 329; SSE41-LABEL: test_v4i32: 330; SSE41: # %bb.0: 331; SSE41-NEXT: movdqa (%rdi), %xmm0 332; SSE41-NEXT: pcmpeqd %xmm1, %xmm1 333; SSE41-NEXT: ptest %xmm1, %xmm0 334; SSE41-NEXT: setb %al 335; SSE41-NEXT: retq 336; 337; AVX-LABEL: test_v4i32: 338; AVX: # %bb.0: 339; AVX-NEXT: vmovdqa (%rdi), %xmm0 340; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 341; AVX-NEXT: vptest %xmm1, %xmm0 342; AVX-NEXT: setb %al 343; AVX-NEXT: retq 344 %vload = load <4 x i32>, ptr %ptr 345 %v0 = extractelement <4 x i32> %vload, i32 0 346 %v1 = extractelement <4 x i32> %vload, i32 1 347 %v2 = extractelement <4 x i32> %vload, i32 2 348 %v3 = extractelement <4 x i32> %vload, i32 3 349 %vreduce01 = and i32 %v0, %v1 350 %vreduce23 = and i32 %v2, %v3 351 %vreduce = and i32 %vreduce01, %vreduce23 352 %vcheck = icmp eq i32 %vreduce, -1 353 ret i1 %vcheck 354} 355 356define i1 @test_v8i32(ptr %ptr) nounwind { 357; SSE2-LABEL: test_v8i32: 358; SSE2: # %bb.0: 359; SSE2-NEXT: movdqa (%rdi), %xmm0 360; SSE2-NEXT: pand 16(%rdi), %xmm0 361; SSE2-NEXT: pcmpeqd %xmm1, %xmm1 362; SSE2-NEXT: pcmpeqd %xmm0, %xmm1 363; SSE2-NEXT: movmskps %xmm1, %eax 364; SSE2-NEXT: xorl $15, %eax 365; SSE2-NEXT: sete %al 366; SSE2-NEXT: retq 367; 368; SSE41-LABEL: test_v8i32: 369; SSE41: # %bb.0: 370; SSE41-NEXT: movdqa (%rdi), %xmm0 371; SSE41-NEXT: pand 16(%rdi), %xmm0 372; SSE41-NEXT: pcmpeqd %xmm1, %xmm1 373; SSE41-NEXT: ptest %xmm1, %xmm0 374; SSE41-NEXT: setb %al 375; SSE41-NEXT: retq 376; 377; AVX1-LABEL: test_v8i32: 378; AVX1: # %bb.0: 379; AVX1-NEXT: vmovdqa (%rdi), %ymm0 380; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 381; AVX1-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1 382; AVX1-NEXT: vptest %ymm1, %ymm0 383; AVX1-NEXT: setb %al 384; AVX1-NEXT: vzeroupper 385; AVX1-NEXT: retq 386; 387; AVX2-LABEL: test_v8i32: 388; AVX2: # %bb.0: 389; AVX2-NEXT: vmovdqa (%rdi), %ymm0 390; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 391; AVX2-NEXT: vptest %ymm1, %ymm0 392; AVX2-NEXT: setb %al 393; AVX2-NEXT: vzeroupper 394; AVX2-NEXT: retq 395; 396; AVX512-LABEL: test_v8i32: 397; AVX512: # %bb.0: 398; AVX512-NEXT: vmovdqa (%rdi), %ymm0 399; AVX512-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 400; AVX512-NEXT: vptest %ymm1, %ymm0 401; AVX512-NEXT: setb %al 402; AVX512-NEXT: vzeroupper 403; AVX512-NEXT: retq 404 %vload = load <8 x i32>, ptr %ptr 405 %v0 = extractelement <8 x i32> %vload, i32 0 406 %v1 = extractelement <8 x i32> %vload, i32 1 407 %v2 = extractelement <8 x i32> %vload, i32 2 408 %v3 = extractelement <8 x i32> %vload, i32 3 409 %v4 = extractelement <8 x i32> %vload, i32 4 410 %v5 = extractelement <8 x i32> %vload, i32 5 411 %v6 = extractelement <8 x i32> %vload, i32 6 412 %v7 = extractelement <8 x i32> %vload, i32 7 413 %vreduce01 = and i32 %v0, %v1 414 %vreduce23 = and i32 %v2, %v3 415 %vreduce45 = and i32 %v4, %v5 416 %vreduce67 = and i32 %v6, %v7 417 %vreduce0123 = and i32 %vreduce01, %vreduce23 418 %vreduce4567 = and i32 %vreduce45, %vreduce67 419 %vreduce = and i32 %vreduce0123, %vreduce4567 420 %vcheck = icmp eq i32 %vreduce, -1 421 ret i1 %vcheck 422} 423 424define i1 @test_v16i32(ptr %ptr) nounwind { 425; SSE2-LABEL: test_v16i32: 426; SSE2: # %bb.0: 427; SSE2-NEXT: movdqa (%rdi), %xmm0 428; SSE2-NEXT: movdqa 16(%rdi), %xmm1 429; SSE2-NEXT: pand 48(%rdi), %xmm1 430; SSE2-NEXT: pand 32(%rdi), %xmm0 431; SSE2-NEXT: pand %xmm1, %xmm0 432; SSE2-NEXT: pcmpeqd %xmm1, %xmm1 433; SSE2-NEXT: pcmpeqd %xmm0, %xmm1 434; SSE2-NEXT: movmskps %xmm1, %eax 435; SSE2-NEXT: xorl $15, %eax 436; SSE2-NEXT: sete %al 437; SSE2-NEXT: retq 438; 439; SSE41-LABEL: test_v16i32: 440; SSE41: # %bb.0: 441; SSE41-NEXT: movdqa (%rdi), %xmm0 442; SSE41-NEXT: movdqa 16(%rdi), %xmm1 443; SSE41-NEXT: pand 48(%rdi), %xmm1 444; SSE41-NEXT: pand 32(%rdi), %xmm0 445; SSE41-NEXT: pand %xmm1, %xmm0 446; SSE41-NEXT: pcmpeqd %xmm1, %xmm1 447; SSE41-NEXT: ptest %xmm1, %xmm0 448; SSE41-NEXT: setb %al 449; SSE41-NEXT: retq 450; 451; AVX1-LABEL: test_v16i32: 452; AVX1: # %bb.0: 453; AVX1-NEXT: vmovaps (%rdi), %ymm0 454; AVX1-NEXT: vandps 32(%rdi), %ymm0, %ymm0 455; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 456; AVX1-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1 457; AVX1-NEXT: vptest %ymm1, %ymm0 458; AVX1-NEXT: setb %al 459; AVX1-NEXT: vzeroupper 460; AVX1-NEXT: retq 461; 462; AVX2-LABEL: test_v16i32: 463; AVX2: # %bb.0: 464; AVX2-NEXT: vmovdqa (%rdi), %ymm0 465; AVX2-NEXT: vpand 32(%rdi), %ymm0, %ymm0 466; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 467; AVX2-NEXT: vptest %ymm1, %ymm0 468; AVX2-NEXT: setb %al 469; AVX2-NEXT: vzeroupper 470; AVX2-NEXT: retq 471; 472; AVX512-LABEL: test_v16i32: 473; AVX512: # %bb.0: 474; AVX512-NEXT: vpternlogd {{.*#+}} zmm0 = -1 475; AVX512-NEXT: vpcmpneqd (%rdi), %zmm0, %k0 476; AVX512-NEXT: kortestw %k0, %k0 477; AVX512-NEXT: sete %al 478; AVX512-NEXT: vzeroupper 479; AVX512-NEXT: retq 480 %vload = load <16 x i32>, ptr %ptr 481 %v0 = extractelement <16 x i32> %vload, i32 0 482 %v1 = extractelement <16 x i32> %vload, i32 1 483 %v2 = extractelement <16 x i32> %vload, i32 2 484 %v3 = extractelement <16 x i32> %vload, i32 3 485 %v4 = extractelement <16 x i32> %vload, i32 4 486 %v5 = extractelement <16 x i32> %vload, i32 5 487 %v6 = extractelement <16 x i32> %vload, i32 6 488 %v7 = extractelement <16 x i32> %vload, i32 7 489 %v8 = extractelement <16 x i32> %vload, i32 8 490 %v9 = extractelement <16 x i32> %vload, i32 9 491 %v10 = extractelement <16 x i32> %vload, i32 10 492 %v11 = extractelement <16 x i32> %vload, i32 11 493 %v12 = extractelement <16 x i32> %vload, i32 12 494 %v13 = extractelement <16 x i32> %vload, i32 13 495 %v14 = extractelement <16 x i32> %vload, i32 14 496 %v15 = extractelement <16 x i32> %vload, i32 15 497 %vreduce01 = and i32 %v0, %v1 498 %vreduce23 = and i32 %v2, %v3 499 %vreduce45 = and i32 %v4, %v5 500 %vreduce67 = and i32 %v6, %v7 501 %vreduce89 = and i32 %v8, %v9 502 %vreduce1011 = and i32 %v10, %v11 503 %vreduce1213 = and i32 %v12, %v13 504 %vreduce1415 = and i32 %v14, %v15 505 %vreduce0123 = and i32 %vreduce01, %vreduce23 506 %vreduce4567 = and i32 %vreduce45, %vreduce67 507 %vreduce891011 = and i32 %vreduce89, %vreduce1011 508 %vreduce12131415 = and i32 %vreduce1213, %vreduce1415 509 %vreduce01234567 = and i32 %vreduce0123, %vreduce4567 510 %vreduce89101112131415 = and i32 %vreduce891011, %vreduce12131415 511 %vreduce = and i32 %vreduce01234567, %vreduce89101112131415 512 %vcheck = icmp eq i32 %vreduce, -1 513 ret i1 %vcheck 514} 515 516; 517; vXi16 518; 519 520define i1 @test_v2i16(ptr %ptr) nounwind { 521; SSE-LABEL: test_v2i16: 522; SSE: # %bb.0: 523; SSE-NEXT: cmpl $-1, (%rdi) 524; SSE-NEXT: sete %al 525; SSE-NEXT: retq 526; 527; AVX-LABEL: test_v2i16: 528; AVX: # %bb.0: 529; AVX-NEXT: cmpl $-1, (%rdi) 530; AVX-NEXT: sete %al 531; AVX-NEXT: retq 532 %vload = load <2 x i16>, ptr %ptr 533 %v0 = extractelement <2 x i16> %vload, i32 0 534 %v1 = extractelement <2 x i16> %vload, i32 1 535 %vreduce = and i16 %v0, %v1 536 %vcheck = icmp eq i16 %vreduce, -1 537 ret i1 %vcheck 538} 539 540define i1 @test_v4i16(ptr %ptr) nounwind { 541; SSE-LABEL: test_v4i16: 542; SSE: # %bb.0: 543; SSE-NEXT: cmpq $-1, (%rdi) 544; SSE-NEXT: sete %al 545; SSE-NEXT: retq 546; 547; AVX-LABEL: test_v4i16: 548; AVX: # %bb.0: 549; AVX-NEXT: cmpq $-1, (%rdi) 550; AVX-NEXT: sete %al 551; AVX-NEXT: retq 552 %vload = load <4 x i16>, ptr %ptr 553 %v0 = extractelement <4 x i16> %vload, i32 0 554 %v1 = extractelement <4 x i16> %vload, i32 1 555 %v2 = extractelement <4 x i16> %vload, i32 2 556 %v3 = extractelement <4 x i16> %vload, i32 3 557 %vreduce01 = and i16 %v0, %v1 558 %vreduce23 = and i16 %v2, %v3 559 %vreduce = and i16 %vreduce01, %vreduce23 560 %vcheck = icmp eq i16 %vreduce, -1 561 ret i1 %vcheck 562} 563 564define i1 @test_v8i16(ptr %ptr) nounwind { 565; SSE2-LABEL: test_v8i16: 566; SSE2: # %bb.0: 567; SSE2-NEXT: pcmpeqd %xmm0, %xmm0 568; SSE2-NEXT: pcmpeqb (%rdi), %xmm0 569; SSE2-NEXT: pmovmskb %xmm0, %eax 570; SSE2-NEXT: xorl $65535, %eax # imm = 0xFFFF 571; SSE2-NEXT: sete %al 572; SSE2-NEXT: retq 573; 574; SSE41-LABEL: test_v8i16: 575; SSE41: # %bb.0: 576; SSE41-NEXT: movdqa (%rdi), %xmm0 577; SSE41-NEXT: pcmpeqd %xmm1, %xmm1 578; SSE41-NEXT: ptest %xmm1, %xmm0 579; SSE41-NEXT: setb %al 580; SSE41-NEXT: retq 581; 582; AVX-LABEL: test_v8i16: 583; AVX: # %bb.0: 584; AVX-NEXT: vmovdqa (%rdi), %xmm0 585; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 586; AVX-NEXT: vptest %xmm1, %xmm0 587; AVX-NEXT: setb %al 588; AVX-NEXT: retq 589 %vload = load <8 x i16>, ptr %ptr 590 %v0 = extractelement <8 x i16> %vload, i32 0 591 %v1 = extractelement <8 x i16> %vload, i32 1 592 %v2 = extractelement <8 x i16> %vload, i32 2 593 %v3 = extractelement <8 x i16> %vload, i32 3 594 %v4 = extractelement <8 x i16> %vload, i32 4 595 %v5 = extractelement <8 x i16> %vload, i32 5 596 %v6 = extractelement <8 x i16> %vload, i32 6 597 %v7 = extractelement <8 x i16> %vload, i32 7 598 %vreduce01 = and i16 %v0, %v1 599 %vreduce23 = and i16 %v2, %v3 600 %vreduce45 = and i16 %v4, %v5 601 %vreduce67 = and i16 %v6, %v7 602 %vreduce0123 = and i16 %vreduce01, %vreduce23 603 %vreduce4567 = and i16 %vreduce45, %vreduce67 604 %vreduce = and i16 %vreduce0123, %vreduce4567 605 %vcheck = icmp eq i16 %vreduce, -1 606 ret i1 %vcheck 607} 608 609define i1 @test_v16i16(ptr %ptr) nounwind { 610; SSE2-LABEL: test_v16i16: 611; SSE2: # %bb.0: 612; SSE2-NEXT: movdqa (%rdi), %xmm0 613; SSE2-NEXT: pand 16(%rdi), %xmm0 614; SSE2-NEXT: pcmpeqd %xmm1, %xmm1 615; SSE2-NEXT: pcmpeqb %xmm0, %xmm1 616; SSE2-NEXT: pmovmskb %xmm1, %eax 617; SSE2-NEXT: xorl $65535, %eax # imm = 0xFFFF 618; SSE2-NEXT: sete %al 619; SSE2-NEXT: retq 620; 621; SSE41-LABEL: test_v16i16: 622; SSE41: # %bb.0: 623; SSE41-NEXT: movdqa (%rdi), %xmm0 624; SSE41-NEXT: pand 16(%rdi), %xmm0 625; SSE41-NEXT: pcmpeqd %xmm1, %xmm1 626; SSE41-NEXT: ptest %xmm1, %xmm0 627; SSE41-NEXT: setb %al 628; SSE41-NEXT: retq 629; 630; AVX1-LABEL: test_v16i16: 631; AVX1: # %bb.0: 632; AVX1-NEXT: vmovdqa (%rdi), %ymm0 633; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 634; AVX1-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1 635; AVX1-NEXT: vptest %ymm1, %ymm0 636; AVX1-NEXT: setb %al 637; AVX1-NEXT: vzeroupper 638; AVX1-NEXT: retq 639; 640; AVX2-LABEL: test_v16i16: 641; AVX2: # %bb.0: 642; AVX2-NEXT: vmovdqa (%rdi), %ymm0 643; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 644; AVX2-NEXT: vptest %ymm1, %ymm0 645; AVX2-NEXT: setb %al 646; AVX2-NEXT: vzeroupper 647; AVX2-NEXT: retq 648; 649; AVX512-LABEL: test_v16i16: 650; AVX512: # %bb.0: 651; AVX512-NEXT: vmovdqa (%rdi), %ymm0 652; AVX512-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 653; AVX512-NEXT: vptest %ymm1, %ymm0 654; AVX512-NEXT: setb %al 655; AVX512-NEXT: vzeroupper 656; AVX512-NEXT: retq 657 %vload = load <16 x i16>, ptr %ptr 658 %v0 = extractelement <16 x i16> %vload, i32 0 659 %v1 = extractelement <16 x i16> %vload, i32 1 660 %v2 = extractelement <16 x i16> %vload, i32 2 661 %v3 = extractelement <16 x i16> %vload, i32 3 662 %v4 = extractelement <16 x i16> %vload, i32 4 663 %v5 = extractelement <16 x i16> %vload, i32 5 664 %v6 = extractelement <16 x i16> %vload, i32 6 665 %v7 = extractelement <16 x i16> %vload, i32 7 666 %v8 = extractelement <16 x i16> %vload, i32 8 667 %v9 = extractelement <16 x i16> %vload, i32 9 668 %v10 = extractelement <16 x i16> %vload, i32 10 669 %v11 = extractelement <16 x i16> %vload, i32 11 670 %v12 = extractelement <16 x i16> %vload, i32 12 671 %v13 = extractelement <16 x i16> %vload, i32 13 672 %v14 = extractelement <16 x i16> %vload, i32 14 673 %v15 = extractelement <16 x i16> %vload, i32 15 674 %vreduce01 = and i16 %v0, %v1 675 %vreduce23 = and i16 %v2, %v3 676 %vreduce45 = and i16 %v4, %v5 677 %vreduce67 = and i16 %v6, %v7 678 %vreduce89 = and i16 %v8, %v9 679 %vreduce1011 = and i16 %v10, %v11 680 %vreduce1213 = and i16 %v12, %v13 681 %vreduce1415 = and i16 %v14, %v15 682 %vreduce0123 = and i16 %vreduce01, %vreduce23 683 %vreduce4567 = and i16 %vreduce45, %vreduce67 684 %vreduce891011 = and i16 %vreduce89, %vreduce1011 685 %vreduce12131415 = and i16 %vreduce1213, %vreduce1415 686 %vreduce01234567 = and i16 %vreduce0123, %vreduce4567 687 %vreduce89101112131415 = and i16 %vreduce891011, %vreduce12131415 688 %vreduce = and i16 %vreduce01234567, %vreduce89101112131415 689 %vcheck = icmp eq i16 %vreduce, -1 690 ret i1 %vcheck 691} 692 693; 694; vXi8 695; 696 697define i1 @test_v2i8(ptr %ptr) nounwind { 698; SSE-LABEL: test_v2i8: 699; SSE: # %bb.0: 700; SSE-NEXT: cmpw $-1, (%rdi) 701; SSE-NEXT: sete %al 702; SSE-NEXT: retq 703; 704; AVX-LABEL: test_v2i8: 705; AVX: # %bb.0: 706; AVX-NEXT: cmpw $-1, (%rdi) 707; AVX-NEXT: sete %al 708; AVX-NEXT: retq 709 %vload = load <2 x i8>, ptr %ptr 710 %v0 = extractelement <2 x i8> %vload, i32 0 711 %v1 = extractelement <2 x i8> %vload, i32 1 712 %vreduce = and i8 %v0, %v1 713 %vcheck = icmp eq i8 %vreduce, -1 714 ret i1 %vcheck 715} 716 717define i1 @test_v4i8(ptr %ptr) nounwind { 718; SSE-LABEL: test_v4i8: 719; SSE: # %bb.0: 720; SSE-NEXT: cmpl $-1, (%rdi) 721; SSE-NEXT: sete %al 722; SSE-NEXT: retq 723; 724; AVX-LABEL: test_v4i8: 725; AVX: # %bb.0: 726; AVX-NEXT: cmpl $-1, (%rdi) 727; AVX-NEXT: sete %al 728; AVX-NEXT: retq 729 %vload = load <4 x i8>, ptr %ptr 730 %v0 = extractelement <4 x i8> %vload, i32 0 731 %v1 = extractelement <4 x i8> %vload, i32 1 732 %v2 = extractelement <4 x i8> %vload, i32 2 733 %v3 = extractelement <4 x i8> %vload, i32 3 734 %vreduce01 = and i8 %v0, %v1 735 %vreduce23 = and i8 %v2, %v3 736 %vreduce = and i8 %vreduce01, %vreduce23 737 %vcheck = icmp eq i8 %vreduce, -1 738 ret i1 %vcheck 739} 740 741define i1 @test_v8i8(ptr %ptr) nounwind { 742; SSE-LABEL: test_v8i8: 743; SSE: # %bb.0: 744; SSE-NEXT: cmpq $-1, (%rdi) 745; SSE-NEXT: sete %al 746; SSE-NEXT: retq 747; 748; AVX-LABEL: test_v8i8: 749; AVX: # %bb.0: 750; AVX-NEXT: cmpq $-1, (%rdi) 751; AVX-NEXT: sete %al 752; AVX-NEXT: retq 753 %vload = load <8 x i8>, ptr %ptr 754 %v0 = extractelement <8 x i8> %vload, i32 0 755 %v1 = extractelement <8 x i8> %vload, i32 1 756 %v2 = extractelement <8 x i8> %vload, i32 2 757 %v3 = extractelement <8 x i8> %vload, i32 3 758 %v4 = extractelement <8 x i8> %vload, i32 4 759 %v5 = extractelement <8 x i8> %vload, i32 5 760 %v6 = extractelement <8 x i8> %vload, i32 6 761 %v7 = extractelement <8 x i8> %vload, i32 7 762 %vreduce01 = and i8 %v0, %v1 763 %vreduce23 = and i8 %v2, %v3 764 %vreduce45 = and i8 %v4, %v5 765 %vreduce67 = and i8 %v6, %v7 766 %vreduce0123 = and i8 %vreduce01, %vreduce23 767 %vreduce4567 = and i8 %vreduce45, %vreduce67 768 %vreduce = and i8 %vreduce0123, %vreduce4567 769 %vcheck = icmp eq i8 %vreduce, -1 770 ret i1 %vcheck 771} 772 773define i1 @test_v16i8(ptr %ptr) nounwind { 774; SSE2-LABEL: test_v16i8: 775; SSE2: # %bb.0: 776; SSE2-NEXT: pcmpeqd %xmm0, %xmm0 777; SSE2-NEXT: pcmpeqb (%rdi), %xmm0 778; SSE2-NEXT: pmovmskb %xmm0, %eax 779; SSE2-NEXT: xorl $65535, %eax # imm = 0xFFFF 780; SSE2-NEXT: sete %al 781; SSE2-NEXT: retq 782; 783; SSE41-LABEL: test_v16i8: 784; SSE41: # %bb.0: 785; SSE41-NEXT: movdqa (%rdi), %xmm0 786; SSE41-NEXT: pcmpeqd %xmm1, %xmm1 787; SSE41-NEXT: ptest %xmm1, %xmm0 788; SSE41-NEXT: setb %al 789; SSE41-NEXT: retq 790; 791; AVX-LABEL: test_v16i8: 792; AVX: # %bb.0: 793; AVX-NEXT: vmovdqa (%rdi), %xmm0 794; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 795; AVX-NEXT: vptest %xmm1, %xmm0 796; AVX-NEXT: setb %al 797; AVX-NEXT: retq 798 %vload = load <16 x i8>, ptr %ptr 799 %v0 = extractelement <16 x i8> %vload, i32 0 800 %v1 = extractelement <16 x i8> %vload, i32 1 801 %v2 = extractelement <16 x i8> %vload, i32 2 802 %v3 = extractelement <16 x i8> %vload, i32 3 803 %v4 = extractelement <16 x i8> %vload, i32 4 804 %v5 = extractelement <16 x i8> %vload, i32 5 805 %v6 = extractelement <16 x i8> %vload, i32 6 806 %v7 = extractelement <16 x i8> %vload, i32 7 807 %v8 = extractelement <16 x i8> %vload, i32 8 808 %v9 = extractelement <16 x i8> %vload, i32 9 809 %v10 = extractelement <16 x i8> %vload, i32 10 810 %v11 = extractelement <16 x i8> %vload, i32 11 811 %v12 = extractelement <16 x i8> %vload, i32 12 812 %v13 = extractelement <16 x i8> %vload, i32 13 813 %v14 = extractelement <16 x i8> %vload, i32 14 814 %v15 = extractelement <16 x i8> %vload, i32 15 815 %vreduce01 = and i8 %v0, %v1 816 %vreduce23 = and i8 %v2, %v3 817 %vreduce45 = and i8 %v4, %v5 818 %vreduce67 = and i8 %v6, %v7 819 %vreduce89 = and i8 %v8, %v9 820 %vreduce1011 = and i8 %v10, %v11 821 %vreduce1213 = and i8 %v12, %v13 822 %vreduce1415 = and i8 %v14, %v15 823 %vreduce0123 = and i8 %vreduce01, %vreduce23 824 %vreduce4567 = and i8 %vreduce45, %vreduce67 825 %vreduce891011 = and i8 %vreduce89, %vreduce1011 826 %vreduce12131415 = and i8 %vreduce1213, %vreduce1415 827 %vreduce01234567 = and i8 %vreduce0123, %vreduce4567 828 %vreduce89101112131415 = and i8 %vreduce891011, %vreduce12131415 829 %vreduce = and i8 %vreduce01234567, %vreduce89101112131415 830 %vcheck = icmp eq i8 %vreduce, -1 831 ret i1 %vcheck 832} 833 834;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: 835; AVX1OR2: {{.*}} 836; AVX512BW: {{.*}} 837; AVX512F: {{.*}} 838; AVX512VL: {{.*}} 839