1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s 3 4define i1 @cmp_allbits_concat_i8(i8 %x, i8 %y) { 5; CHECK-LABEL: cmp_allbits_concat_i8: 6; CHECK: # %bb.0: 7; CHECK-NEXT: andl %esi, %edi 8; CHECK-NEXT: cmpb $-1, %dil 9; CHECK-NEXT: sete %al 10; CHECK-NEXT: retq 11 %zx = zext i8 %x to i16 12 %zy = zext i8 %y to i16 13 %sh = shl i16 %zx, 8 14 %or = or i16 %zy, %sh 15 %r = icmp eq i16 %or, -1 16 ret i1 %r 17} 18 19define i1 @cmp_anybits_concat_i32(i32 %x, i32 %y) { 20; CHECK-LABEL: cmp_anybits_concat_i32: 21; CHECK: # %bb.0: 22; CHECK-NEXT: orl %esi, %edi 23; CHECK-NEXT: setne %al 24; CHECK-NEXT: retq 25 %zx = zext i32 %x to i64 26 %zy = zext i32 %y to i64 27 %sh = shl i64 %zx, 32 28 %or = or i64 %zy, %sh 29 %r = icmp ne i64 %or, 0 30 ret i1 %r 31} 32 33define i1 @cmp_anybits_concat_shl_shl_i16(i16 %x, i16 %y) { 34; CHECK-LABEL: cmp_anybits_concat_shl_shl_i16: 35; CHECK: # %bb.0: 36; CHECK-NEXT: movzwl %di, %eax 37; CHECK-NEXT: movzwl %si, %ecx 38; CHECK-NEXT: shll $8, %ecx 39; CHECK-NEXT: orl %eax, %ecx 40; CHECK-NEXT: sete %al 41; CHECK-NEXT: retq 42 %zx = zext i16 %x to i64 43 %zy = zext i16 %y to i64 44 %sx = shl i64 %zx, 32 45 %sy = shl i64 %zy, 8 46 %or = or i64 %sx, %sy 47 %r = icmp eq i64 %or, 0 48 ret i1 %r 49} 50 51define i1 @cmp_anybits_concat_shl_shl_i16_commute(i16 %x, i16 %y) { 52; CHECK-LABEL: cmp_anybits_concat_shl_shl_i16_commute: 53; CHECK: # %bb.0: 54; CHECK-NEXT: movzwl %di, %eax 55; CHECK-NEXT: movzwl %si, %ecx 56; CHECK-NEXT: shll $8, %ecx 57; CHECK-NEXT: orl %eax, %ecx 58; CHECK-NEXT: sete %al 59; CHECK-NEXT: retq 60 %zx = zext i16 %x to i64 61 %zy = zext i16 %y to i64 62 %sx = shl i64 %zx, 32 63 %sy = shl i64 %zy, 8 64 %or = or i64 %sy, %sx 65 %r = icmp eq i64 %or, 0 66 ret i1 %r 67} 68 69; FIXME: Add vector support, but its only worth it if we can freely truncate the 70; concat'd vectors. 71 72define <16 x i8> @cmp_allbits_concat_v16i8(<16 x i8> %x, <16 x i8> %y) { 73; CHECK-LABEL: cmp_allbits_concat_v16i8: 74; CHECK: # %bb.0: 75; CHECK-NEXT: movdqa %xmm1, %xmm2 76; CHECK-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 77; CHECK-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15] 78; CHECK-NEXT: pcmpeqd %xmm0, %xmm0 79; CHECK-NEXT: pcmpeqw %xmm0, %xmm1 80; CHECK-NEXT: pcmpeqw %xmm0, %xmm2 81; CHECK-NEXT: packsswb %xmm1, %xmm2 82; CHECK-NEXT: movdqa %xmm2, %xmm0 83; CHECK-NEXT: retq 84 %zx = zext <16 x i8> %x to <16 x i16> 85 %zy = zext <16 x i8> %y to <16 x i16> 86 %sh = shl <16 x i16> %zx, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> 87 %or = or <16 x i16> %zy, %sh 88 %r = icmp eq <16 x i16> %or, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> 89 %s = sext <16 x i1> %r to <16 x i8> 90 ret <16 x i8> %s 91} 92 93define <2 x i64> @cmp_nobits_concat_v2i64(<2 x i64> %x, <2 x i64> %y) { 94; CHECK-LABEL: cmp_nobits_concat_v2i64: 95; CHECK: # %bb.0: 96; CHECK-NEXT: movq %xmm0, %rax 97; CHECK-NEXT: pextrq $1, %xmm0, %rcx 98; CHECK-NEXT: movq %xmm1, %rdx 99; CHECK-NEXT: pextrq $1, %xmm1, %rsi 100; CHECK-NEXT: xorl %edi, %edi 101; CHECK-NEXT: orq %rcx, %rsi 102; CHECK-NEXT: sete %dil 103; CHECK-NEXT: negq %rdi 104; CHECK-NEXT: movq %rdi, %xmm1 105; CHECK-NEXT: xorl %ecx, %ecx 106; CHECK-NEXT: orq %rax, %rdx 107; CHECK-NEXT: sete %cl 108; CHECK-NEXT: negq %rcx 109; CHECK-NEXT: movq %rcx, %xmm0 110; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 111; CHECK-NEXT: retq 112 %zx = zext <2 x i64> %x to <2 x i128> 113 %zy = zext <2 x i64> %y to <2 x i128> 114 %sh = shl <2 x i128> %zx, <i128 64, i128 64> 115 %or = or <2 x i128> %zy, %sh 116 %r = icmp eq <2 x i128> %or, zeroinitializer 117 %s = sext <2 x i1> %r to <2 x i64> 118 ret <2 x i64> %s 119} 120