Analysis/ValueTracking/knownbits-x86-hadd-hsub.ll

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
; RUN: opt -S -passes=instcombine < %s | FileCheck %s

define <4 x i1> @hadd_and_eq_v4i32(<4 x i32> %x, <4 x i32> %y) {
; CHECK-LABEL: define <4 x i1> @hadd_and_eq_v4i32(
; CHECK-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    ret <4 x i1> zeroinitializer
;
entry:
  %and1 = and <4 x i32> %x, <i32 3, i32 3, i32 3, i32 3>
  %and2 = and <4 x i32> %y, <i32 3, i32 3, i32 3, i32 3>
  %hadd = tail call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> %and1, <4 x i32> %and2)
  %andr = and <4 x i32> %hadd, <i32 -8, i32 -8, i32 -8, i32 -8>
  %ret = icmp eq <4 x i32> %andr, <i32 3, i32 4, i32 5, i32 6>
  ret <4 x i1> %ret
}

define <8 x i1> @hadd_and_eq_v8i16(<8 x i16> %x, <8 x i16> %y) {
; CHECK-LABEL: define <8 x i1> @hadd_and_eq_v8i16(
; CHECK-SAME: <8 x i16> [[X:%.*]], <8 x i16> [[Y:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    ret <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true>
;
entry:
  %and1 = and <8 x i16> %x, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
  %and2 = and <8 x i16> %y, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
  %hadd = tail call <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16> %and1, <8 x i16> %and2)
  %andr = and <8 x i16> %hadd, <i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8>
  %ret = icmp eq <8 x i16> %andr, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 0>
  ret <8 x i1> %ret
}

define <8 x i1> @hadd_and_eq_v8i16_sat(<8 x i16> %x, <8 x i16> %y) {
; CHECK-LABEL: define <8 x i1> @hadd_and_eq_v8i16_sat(
; CHECK-SAME: <8 x i16> [[X:%.*]], <8 x i16> [[Y:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    ret <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true>
;
entry:
  %and1 = and <8 x i16> %x, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
  %and2 = and <8 x i16> %y, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
  %hadd = tail call <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16> %and1, <8 x i16> %and2)
  %andr = and <8 x i16> %hadd, <i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8>
  %ret = icmp eq <8 x i16> %andr, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 0>
  ret <8 x i1> %ret
}

define <8 x i1> @hadd_and_eq_v8i32(<8 x i32> %x, <8 x i32> %y) {
; CHECK-LABEL: define <8 x i1> @hadd_and_eq_v8i32(
; CHECK-SAME: <8 x i32> [[X:%.*]], <8 x i32> [[Y:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    ret <8 x i1> zeroinitializer
;
entry:
  %and1 = and <8 x i32> %x, <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
  %and2 = and <8 x i32> %y, <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
  %hadd = tail call <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32> %and1, <8 x i32> %and2)
  %andr = and <8 x i32> %hadd, <i32 -8, i32 -8, i32 -8, i32 -8, i32 -8, i32 -8, i32 -8, i32 -8>
  %ret = icmp eq <8 x i32> %andr, <i32 3, i32 4, i32 5, i32 6, i32 3, i32 4, i32 5, i32 6>
  ret <8 x i1> %ret
}

define <16 x i1> @hadd_and_eq_v16i16(<16 x i16> %x, <16 x i16> %y) {
; CHECK-LABEL: define <16 x i1> @hadd_and_eq_v16i16(
; CHECK-SAME: <16 x i16> [[X:%.*]], <16 x i16> [[Y:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    ret <16 x i1> <i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true>
;
entry:
  %and1 = and <16 x i16> %x, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
  %and2 = and <16 x i16> %y, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
  %hadd = tail call <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16> %and1, <16 x i16> %and2)
  %andr = and <16 x i16> %hadd, <i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8>
  %ret = icmp eq <16 x i16> %andr, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 0, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 0>
  ret <16 x i1> %ret
}

define <16 x i1> @hadd_and_eq_v16i16_sat(<16 x i16> %x, <16 x i16> %y) {
; CHECK-LABEL: define <16 x i1> @hadd_and_eq_v16i16_sat(
; CHECK-SAME: <16 x i16> [[X:%.*]], <16 x i16> [[Y:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    ret <16 x i1> <i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true>
;
entry:
  %and1 = and <16 x i16> %x, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
  %and2 = and <16 x i16> %y, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
  %hadd = tail call <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16> %and1, <16 x i16> %and2)
  %andr = and <16 x i16> %hadd, <i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8>
  %ret = icmp eq <16 x i16> %andr, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 0, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 0>
  ret <16 x i1> %ret
}

define <4 x i1> @hsub_trunc_eq_v4i32(<4 x i32> %x, <4 x i32> %y) {
; CHECK-LABEL: define <4 x i1> @hsub_trunc_eq_v4i32(
; CHECK-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    ret <4 x i1> zeroinitializer
;
entry:
  %or1 = or <4 x i32> %x, <i32 65535, i32 65535, i32 65535, i32 65535>
  %or2 = or <4 x i32> %y, <i32 65535, i32 65535, i32 65535, i32 65535>
  %hsub = tail call <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32> %or1, <4 x i32> %or2)
  %conv = trunc <4 x i32> %hsub to <4 x i16>
  %ret = icmp eq <4 x i16> %conv, <i16 3, i16 4, i16 5, i16 6>
  ret <4 x i1> %ret
}

define <8 x i1> @hsub_trunc_eq_v8i16(<8 x i16> %x, <8 x i16> %y) {
; CHECK-LABEL: define <8 x i1> @hsub_trunc_eq_v8i16(
; CHECK-SAME: <8 x i16> [[X:%.*]], <8 x i16> [[Y:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    ret <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true>
;
entry:
  %or1 = or <8 x i16> %x, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
  %or2 = or <8 x i16> %y, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
  %hsub = tail call <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16> %or1, <8 x i16> %or2)
  %conv = trunc <8 x i16> %hsub to <8 x i8>
  %ret = icmp eq <8 x i8> %conv, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 0>
  ret <8 x i1> %ret
}

define <8 x i1> @hsub_and_eq_v8i16_sat(<8 x i16> %x, <8 x i16> %y) {
; CHECK-LABEL: define <8 x i1> @hsub_and_eq_v8i16_sat(
; CHECK-SAME: <8 x i16> [[X:%.*]], <8 x i16> [[Y:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    ret <8 x i1> splat (i1 true)
;
entry:
  %or1 = or <8 x i16> %x, <i16 3, i16 0, i16 3, i16 0, i16 3, i16 0, i16 3, i16 0>
  %or2 = or <8 x i16> %y, <i16 3, i16 0, i16 3, i16 0, i16 3, i16 0, i16 3, i16 0>
  %and1 = and <8 x i16> %or1, <i16 7, i16 3, i16 7, i16 3, i16 7, i16 3, i16 7, i16 3>
  %and2 = and <8 x i16> %or2, <i16 7, i16 3, i16 7, i16 3, i16 7, i16 3, i16 7, i16 3>
  %hsub = tail call <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16> %and1, <8 x i16> %and2)
  %ret = icmp sle <8 x i16> %hsub, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
  ret <8 x i1> %ret
}

define <8 x i1> @hsub_trunc_eq_v8i32(<8 x i32> %x, <8 x i32> %y) {
; CHECK-LABEL: define <8 x i1> @hsub_trunc_eq_v8i32(
; CHECK-SAME: <8 x i32> [[X:%.*]], <8 x i32> [[Y:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    ret <8 x i1> zeroinitializer
;
entry:
  %or1 = or <8 x i32> %x, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
  %or2 = or <8 x i32> %y, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
  %hsub = tail call <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32> %or1, <8 x i32> %or2)
  %conv = trunc <8 x i32> %hsub to <8 x i16>
  %ret = icmp eq <8 x i16> %conv, <i16 3, i16 4, i16 5, i16 6, i16 3, i16 4, i16 5, i16 6>
  ret <8 x i1> %ret
}

define <16 x i1> @hsub_trunc_eq_v16i16(<16 x i16> %x, <16 x i16> %y) {
; CHECK-LABEL: define <16 x i1> @hsub_trunc_eq_v16i16(
; CHECK-SAME: <16 x i16> [[X:%.*]], <16 x i16> [[Y:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    ret <16 x i1> <i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true>
;
entry:
  %or1 = or <16 x i16> %x, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
  %or2 = or <16 x i16> %y, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
  %hsub = tail call <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16> %or1, <16 x i16> %or2)
  %conv = trunc <16 x i16> %hsub to <16 x i8>
  %ret = icmp eq <16 x i8> %conv, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 0>
  ret <16 x i1> %ret
}

define <16 x i1> @hsub_and_eq_v16i16_sat(<16 x i16> %x, <16 x i16> %y) {
; CHECK-LABEL: define <16 x i1> @hsub_and_eq_v16i16_sat(
; CHECK-SAME: <16 x i16> [[X:%.*]], <16 x i16> [[Y:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    ret <16 x i1> splat (i1 true)
;
entry:
  %or1 = or <16 x i16> %x, <i16 3, i16 0, i16 3, i16 0, i16 3, i16 0, i16 3, i16 0, i16 3, i16 0, i16 3, i16 0, i16 3, i16 0, i16 3, i16 0>
  %or2 = or <16 x i16> %y, <i16 3, i16 0, i16 3, i16 0, i16 3, i16 0, i16 3, i16 0, i16 3, i16 0, i16 3, i16 0, i16 3, i16 0, i16 3, i16 0>
  %and1 = and <16 x i16> %or1, <i16 7, i16 3, i16 7, i16 3, i16 7, i16 3, i16 7, i16 3, i16 7, i16 3, i16 7, i16 3, i16 7, i16 3, i16 7, i16 3>
  %and2 = and <16 x i16> %or2, <i16 7, i16 3, i16 7, i16 3, i16 7, i16 3, i16 7, i16 3, i16 7, i16 3, i16 7, i16 3, i16 7, i16 3, i16 7, i16 3>
  %hsub = tail call <16 x i16> @llvm.x86.avx2.phsub.sw(<16 x i16> %and1, <16 x i16> %and2)
  %ret = icmp sle <16 x i16> %hsub, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
  ret <16 x i1> %ret
}

define <4 x i1> @hadd_shuffle_2st_v4i32(<4 x i32> %x, <4 x i32> %y) {
; CHECK-LABEL: define <4 x i1> @hadd_shuffle_2st_v4i32(
; CHECK-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    ret <4 x i1> splat (i1 true)
;
entry:
  %and1 = and <4 x i32> %x, <i32 -1, i32 -1, i32 3, i32 3>
  %and2 = and <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
  %hadd = tail call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> %and1, <4 x i32> %and2)
  %shuf = shufflevector <4 x i32> %hadd, <4x i32> zeroinitializer, <4 x i32> <i32 4, i32 1, i32 5, i32 6>
  %ret = icmp ne <4 x i32> %shuf, <i32 8, i32 8, i32 8, i32 8>
  ret <4 x i1> %ret
}

define <4 x i1> @hadd_shuffle_4th_v4i32(<4 x i32> %x, <4 x i32> %y) {
; CHECK-LABEL: define <4 x i1> @hadd_shuffle_4th_v4i32(
; CHECK-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    ret <4 x i1> splat (i1 true)
;
entry:
  %and1 = and <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
  %and2 = and <4 x i32> %y, <i32 -1, i32 -1, i32 3, i32 3>
  %hadd = tail call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> %and1, <4 x i32> %and2)
  %shuf = shufflevector <4 x i32> %hadd, <4x i32> zeroinitializer, <4 x i32> <i32 4, i32 5, i32 6, i32 3>
  %ret = icmp ne <4 x i32> %shuf, <i32 8, i32 8, i32 8, i32 8>
  ret <4 x i1> %ret
}

define <4 x i1> @hadd_shuffle_2st_negative_v4i32(<4 x i32> %x, <4 x i32> %y) {
; CHECK-LABEL: define <4 x i1> @hadd_shuffle_2st_negative_v4i32(
; CHECK-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = and <4 x i32> [[X]], <i32 3, i32 3, i32 -1, i32 -1>
; CHECK-NEXT:    [[TMP1:%.*]] = and <4 x i32> [[Y]], splat (i32 3)
; CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> <i32 0, i32 0, i32 0, i32 poison>, <4 x i32> <i32 4, i32 1, i32 5, i32 6>
; CHECK-NEXT:    [[RET:%.*]] = icmp ne <4 x i32> [[TMP3]], splat (i32 8)
; CHECK-NEXT:    ret <4 x i1> [[RET]]
;
entry:
  %and1 = and <4 x i32> %x, <i32 3, i32 3, i32 -1, i32 -1>
  %and2 = and <4 x i32> %y, <i32 3, i32 3, i32 3, i32 3>
  %hadd = tail call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> %and1, <4 x i32> %and2)
  %shuf = shufflevector <4 x i32> %hadd, <4x i32> zeroinitializer, <4 x i32> <i32 4, i32 1, i32 5, i32 6>
  %ret = icmp ne <4 x i32> %shuf, <i32 8, i32 8, i32 8, i32 8>
  ret <4 x i1> %ret
}

define <4 x i1> @hadd_shuffle_4th_negative_v4i32(<4 x i32> %x, <4 x i32> %y) {
; CHECK-LABEL: define <4 x i1> @hadd_shuffle_4th_negative_v4i32(
; CHECK-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]]) {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = and <4 x i32> [[X]], splat (i32 3)
; CHECK-NEXT:    [[TMP1:%.*]] = and <4 x i32> [[Y]], <i32 3, i32 3, i32 -1, i32 -1>
; CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <4 x i32> <i32 0, i32 0, i32 0, i32 poison>, <4 x i32> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
; CHECK-NEXT:    [[RET:%.*]] = icmp ne <4 x i32> [[TMP3]], splat (i32 8)
; CHECK-NEXT:    ret <4 x i1> [[RET]]
;
entry:
  %and1 = and <4 x i32> %x, <i32 3, i32 3, i32 3, i32 3>
  %and2 = and <4 x i32> %y, <i32 3, i32 3, i32 -1, i32 -1>
  %hadd = tail call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> %and1, <4 x i32> %and2)
  %shuf = shufflevector <4 x i32> %hadd, <4x i32> zeroinitializer, <4 x i32> <i32 4, i32 5, i32 6, i32 3>
  %ret = icmp ne <4 x i32> %shuf, <i32 8, i32 8, i32 8, i32 8>
  ret <4 x i1> %ret
}