1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 2; RUN: opt -S -passes=instcombine < %s | FileCheck %s 3 4define <4 x i1> @hadd_and_eq_v4i32(<4 x i32> %x, <4 x i32> %y) { 5; CHECK-LABEL: define <4 x i1> @hadd_and_eq_v4i32( 6; CHECK-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]]) { 7; CHECK-NEXT: entry: 8; CHECK-NEXT: ret <4 x i1> zeroinitializer 9; 10entry: 11 %and1 = and <4 x i32> %x, <i32 3, i32 3, i32 3, i32 3> 12 %and2 = and <4 x i32> %y, <i32 3, i32 3, i32 3, i32 3> 13 %hadd = tail call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> %and1, <4 x i32> %and2) 14 %andr = and <4 x i32> %hadd, <i32 -8, i32 -8, i32 -8, i32 -8> 15 %ret = icmp eq <4 x i32> %andr, <i32 3, i32 4, i32 5, i32 6> 16 ret <4 x i1> %ret 17} 18 19define <8 x i1> @hadd_and_eq_v8i16(<8 x i16> %x, <8 x i16> %y) { 20; CHECK-LABEL: define <8 x i1> @hadd_and_eq_v8i16( 21; CHECK-SAME: <8 x i16> [[X:%.*]], <8 x i16> [[Y:%.*]]) { 22; CHECK-NEXT: entry: 23; CHECK-NEXT: ret <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true> 24; 25entry: 26 %and1 = and <8 x i16> %x, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> 27 %and2 = and <8 x i16> %y, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> 28 %hadd = tail call <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16> %and1, <8 x i16> %and2) 29 %andr = and <8 x i16> %hadd, <i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8> 30 %ret = icmp eq <8 x i16> %andr, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 0> 31 ret <8 x i1> %ret 32} 33 34define <8 x i1> @hadd_and_eq_v8i16_sat(<8 x i16> %x, <8 x i16> %y) { 35; CHECK-LABEL: define <8 x i1> @hadd_and_eq_v8i16_sat( 36; CHECK-SAME: <8 x i16> [[X:%.*]], <8 x i16> [[Y:%.*]]) { 37; CHECK-NEXT: entry: 38; CHECK-NEXT: ret <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true> 39; 40entry: 41 %and1 = and <8 x i16> %x, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> 42 %and2 = and <8 x i16> %y, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> 43 %hadd = tail call <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16> %and1, <8 x i16> %and2) 44 %andr = and <8 x i16> %hadd, <i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8> 45 %ret = icmp eq <8 x i16> %andr, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 0> 46 ret <8 x i1> %ret 47} 48 49define <8 x i1> @hadd_and_eq_v8i32(<8 x i32> %x, <8 x i32> %y) { 50; CHECK-LABEL: define <8 x i1> @hadd_and_eq_v8i32( 51; CHECK-SAME: <8 x i32> [[X:%.*]], <8 x i32> [[Y:%.*]]) { 52; CHECK-NEXT: entry: 53; CHECK-NEXT: ret <8 x i1> zeroinitializer 54; 55entry: 56 %and1 = and <8 x i32> %x, <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 57 %and2 = and <8 x i32> %y, <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 58 %hadd = tail call <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32> %and1, <8 x i32> %and2) 59 %andr = and <8 x i32> %hadd, <i32 -8, i32 -8, i32 -8, i32 -8, i32 -8, i32 -8, i32 -8, i32 -8> 60 %ret = icmp eq <8 x i32> %andr, <i32 3, i32 4, i32 5, i32 6, i32 3, i32 4, i32 5, i32 6> 61 ret <8 x i1> %ret 62} 63 64define <16 x i1> @hadd_and_eq_v16i16(<16 x i16> %x, <16 x i16> %y) { 65; CHECK-LABEL: define <16 x i1> @hadd_and_eq_v16i16( 66; CHECK-SAME: <16 x i16> [[X:%.*]], <16 x i16> [[Y:%.*]]) { 67; CHECK-NEXT: entry: 68; CHECK-NEXT: ret <16 x i1> <i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true> 69; 70entry: 71 %and1 = and <16 x i16> %x, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> 72 %and2 = and <16 x i16> %y, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> 73 %hadd = tail call <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16> %and1, <16 x i16> %and2) 74 %andr = and <16 x i16> %hadd, <i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8> 75 %ret = icmp eq <16 x i16> %andr, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 0, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 0> 76 ret <16 x i1> %ret 77} 78 79define <16 x i1> @hadd_and_eq_v16i16_sat(<16 x i16> %x, <16 x i16> %y) { 80; CHECK-LABEL: define <16 x i1> @hadd_and_eq_v16i16_sat( 81; CHECK-SAME: <16 x i16> [[X:%.*]], <16 x i16> [[Y:%.*]]) { 82; CHECK-NEXT: entry: 83; CHECK-NEXT: ret <16 x i1> <i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true> 84; 85entry: 86 %and1 = and <16 x i16> %x, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> 87 %and2 = and <16 x i16> %y, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> 88 %hadd = tail call <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16> %and1, <16 x i16> %and2) 89 %andr = and <16 x i16> %hadd, <i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8> 90 %ret = icmp eq <16 x i16> %andr, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 0, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 0> 91 ret <16 x i1> %ret 92} 93 94define <4 x i1> @hsub_trunc_eq_v4i32(<4 x i32> %x, <4 x i32> %y) { 95; CHECK-LABEL: define <4 x i1> @hsub_trunc_eq_v4i32( 96; CHECK-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]]) { 97; CHECK-NEXT: entry: 98; CHECK-NEXT: ret <4 x i1> zeroinitializer 99; 100entry: 101 %or1 = or <4 x i32> %x, <i32 65535, i32 65535, i32 65535, i32 65535> 102 %or2 = or <4 x i32> %y, <i32 65535, i32 65535, i32 65535, i32 65535> 103 %hsub = tail call <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32> %or1, <4 x i32> %or2) 104 %conv = trunc <4 x i32> %hsub to <4 x i16> 105 %ret = icmp eq <4 x i16> %conv, <i16 3, i16 4, i16 5, i16 6> 106 ret <4 x i1> %ret 107} 108 109define <8 x i1> @hsub_trunc_eq_v8i16(<8 x i16> %x, <8 x i16> %y) { 110; CHECK-LABEL: define <8 x i1> @hsub_trunc_eq_v8i16( 111; CHECK-SAME: <8 x i16> [[X:%.*]], <8 x i16> [[Y:%.*]]) { 112; CHECK-NEXT: entry: 113; CHECK-NEXT: ret <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true> 114; 115entry: 116 %or1 = or <8 x i16> %x, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255> 117 %or2 = or <8 x i16> %y, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255> 118 %hsub = tail call <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16> %or1, <8 x i16> %or2) 119 %conv = trunc <8 x i16> %hsub to <8 x i8> 120 %ret = icmp eq <8 x i8> %conv, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 0> 121 ret <8 x i1> %ret 122} 123 124define <8 x i1> @hsub_and_eq_v8i16_sat(<8 x i16> %x, <8 x i16> %y) { 125; CHECK-LABEL: define <8 x i1> @hsub_and_eq_v8i16_sat( 126; CHECK-SAME: <8 x i16> [[X:%.*]], <8 x i16> [[Y:%.*]]) { 127; CHECK-NEXT: entry: 128; CHECK-NEXT: ret <8 x i1> splat (i1 true) 129; 130entry: 131 %or1 = or <8 x i16> %x, <i16 3, i16 0, i16 3, i16 0, i16 3, i16 0, i16 3, i16 0> 132 %or2 = or <8 x i16> %y, <i16 3, i16 0, i16 3, i16 0, i16 3, i16 0, i16 3, i16 0> 133 %and1 = and <8 x i16> %or1, <i16 7, i16 3, i16 7, i16 3, i16 7, i16 3, i16 7, i16 3> 134 %and2 = and <8 x i16> %or2, <i16 7, i16 3, i16 7, i16 3, i16 7, i16 3, i16 7, i16 3> 135 %hsub = tail call <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16> %and1, <8 x i16> %and2) 136 %ret = icmp sle <8 x i16> %hsub, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7> 137 ret <8 x i1> %ret 138} 139 140define <8 x i1> @hsub_trunc_eq_v8i32(<8 x i32> %x, <8 x i32> %y) { 141; CHECK-LABEL: define <8 x i1> @hsub_trunc_eq_v8i32( 142; CHECK-SAME: <8 x i32> [[X:%.*]], <8 x i32> [[Y:%.*]]) { 143; CHECK-NEXT: entry: 144; CHECK-NEXT: ret <8 x i1> zeroinitializer 145; 146entry: 147 %or1 = or <8 x i32> %x, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535> 148 %or2 = or <8 x i32> %y, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535> 149 %hsub = tail call <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32> %or1, <8 x i32> %or2) 150 %conv = trunc <8 x i32> %hsub to <8 x i16> 151 %ret = icmp eq <8 x i16> %conv, <i16 3, i16 4, i16 5, i16 6, i16 3, i16 4, i16 5, i16 6> 152 ret <8 x i1> %ret 153} 154 155define <16 x i1> @hsub_trunc_eq_v16i16(<16 x i16> %x, <16 x i16> %y) { 156; CHECK-LABEL: define <16 x i1> @hsub_trunc_eq_v16i16( 157; CHECK-SAME: <16 x i16> [[X:%.*]], <16 x i16> [[Y:%.*]]) { 158; CHECK-NEXT: entry: 159; CHECK-NEXT: ret <16 x i1> <i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true> 160; 161entry: 162 %or1 = or <16 x i16> %x, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255> 163 %or2 = or <16 x i16> %y, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255> 164 %hsub = tail call <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16> %or1, <16 x i16> %or2) 165 %conv = trunc <16 x i16> %hsub to <16 x i8> 166 %ret = icmp eq <16 x i8> %conv, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 0> 167 ret <16 x i1> %ret 168} 169 170define <16 x i1> @hsub_and_eq_v16i16_sat(<16 x i16> %x, <16 x i16> %y) { 171; CHECK-LABEL: define <16 x i1> @hsub_and_eq_v16i16_sat( 172; CHECK-SAME: <16 x i16> [[X:%.*]], <16 x i16> [[Y:%.*]]) { 173; CHECK-NEXT: entry: 174; CHECK-NEXT: ret <16 x i1> splat (i1 true) 175; 176entry: 177 %or1 = or <16 x i16> %x, <i16 3, i16 0, i16 3, i16 0, i16 3, i16 0, i16 3, i16 0, i16 3, i16 0, i16 3, i16 0, i16 3, i16 0, i16 3, i16 0> 178 %or2 = or <16 x i16> %y, <i16 3, i16 0, i16 3, i16 0, i16 3, i16 0, i16 3, i16 0, i16 3, i16 0, i16 3, i16 0, i16 3, i16 0, i16 3, i16 0> 179 %and1 = and <16 x i16> %or1, <i16 7, i16 3, i16 7, i16 3, i16 7, i16 3, i16 7, i16 3, i16 7, i16 3, i16 7, i16 3, i16 7, i16 3, i16 7, i16 3> 180 %and2 = and <16 x i16> %or2, <i16 7, i16 3, i16 7, i16 3, i16 7, i16 3, i16 7, i16 3, i16 7, i16 3, i16 7, i16 3, i16 7, i16 3, i16 7, i16 3> 181 %hsub = tail call <16 x i16> @llvm.x86.avx2.phsub.sw(<16 x i16> %and1, <16 x i16> %and2) 182 %ret = icmp sle <16 x i16> %hsub, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7> 183 ret <16 x i1> %ret 184} 185 186define <4 x i1> @hadd_shuffle_2st_v4i32(<4 x i32> %x, <4 x i32> %y) { 187; CHECK-LABEL: define <4 x i1> @hadd_shuffle_2st_v4i32( 188; CHECK-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]]) { 189; CHECK-NEXT: entry: 190; CHECK-NEXT: ret <4 x i1> splat (i1 true) 191; 192entry: 193 %and1 = and <4 x i32> %x, <i32 -1, i32 -1, i32 3, i32 3> 194 %and2 = and <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> 195 %hadd = tail call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> %and1, <4 x i32> %and2) 196 %shuf = shufflevector <4 x i32> %hadd, <4x i32> zeroinitializer, <4 x i32> <i32 4, i32 1, i32 5, i32 6> 197 %ret = icmp ne <4 x i32> %shuf, <i32 8, i32 8, i32 8, i32 8> 198 ret <4 x i1> %ret 199} 200 201define <4 x i1> @hadd_shuffle_4th_v4i32(<4 x i32> %x, <4 x i32> %y) { 202; CHECK-LABEL: define <4 x i1> @hadd_shuffle_4th_v4i32( 203; CHECK-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]]) { 204; CHECK-NEXT: entry: 205; CHECK-NEXT: ret <4 x i1> splat (i1 true) 206; 207entry: 208 %and1 = and <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> 209 %and2 = and <4 x i32> %y, <i32 -1, i32 -1, i32 3, i32 3> 210 %hadd = tail call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> %and1, <4 x i32> %and2) 211 %shuf = shufflevector <4 x i32> %hadd, <4x i32> zeroinitializer, <4 x i32> <i32 4, i32 5, i32 6, i32 3> 212 %ret = icmp ne <4 x i32> %shuf, <i32 8, i32 8, i32 8, i32 8> 213 ret <4 x i1> %ret 214} 215 216define <4 x i1> @hadd_shuffle_2st_negative_v4i32(<4 x i32> %x, <4 x i32> %y) { 217; CHECK-LABEL: define <4 x i1> @hadd_shuffle_2st_negative_v4i32( 218; CHECK-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]]) { 219; CHECK-NEXT: entry: 220; CHECK-NEXT: [[TMP0:%.*]] = and <4 x i32> [[X]], <i32 3, i32 3, i32 -1, i32 -1> 221; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i32> [[Y]], splat (i32 3) 222; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) 223; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> <i32 0, i32 0, i32 0, i32 poison>, <4 x i32> <i32 4, i32 1, i32 5, i32 6> 224; CHECK-NEXT: [[RET:%.*]] = icmp ne <4 x i32> [[TMP3]], splat (i32 8) 225; CHECK-NEXT: ret <4 x i1> [[RET]] 226; 227entry: 228 %and1 = and <4 x i32> %x, <i32 3, i32 3, i32 -1, i32 -1> 229 %and2 = and <4 x i32> %y, <i32 3, i32 3, i32 3, i32 3> 230 %hadd = tail call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> %and1, <4 x i32> %and2) 231 %shuf = shufflevector <4 x i32> %hadd, <4x i32> zeroinitializer, <4 x i32> <i32 4, i32 1, i32 5, i32 6> 232 %ret = icmp ne <4 x i32> %shuf, <i32 8, i32 8, i32 8, i32 8> 233 ret <4 x i1> %ret 234} 235 236define <4 x i1> @hadd_shuffle_4th_negative_v4i32(<4 x i32> %x, <4 x i32> %y) { 237; CHECK-LABEL: define <4 x i1> @hadd_shuffle_4th_negative_v4i32( 238; CHECK-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]]) { 239; CHECK-NEXT: entry: 240; CHECK-NEXT: [[TMP0:%.*]] = and <4 x i32> [[X]], splat (i32 3) 241; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i32> [[Y]], <i32 3, i32 3, i32 -1, i32 -1> 242; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) 243; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> <i32 0, i32 0, i32 0, i32 poison>, <4 x i32> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 2, i32 7> 244; CHECK-NEXT: [[RET:%.*]] = icmp ne <4 x i32> [[TMP3]], splat (i32 8) 245; CHECK-NEXT: ret <4 x i1> [[RET]] 246; 247entry: 248 %and1 = and <4 x i32> %x, <i32 3, i32 3, i32 3, i32 3> 249 %and2 = and <4 x i32> %y, <i32 3, i32 3, i32 -1, i32 -1> 250 %hadd = tail call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> %and1, <4 x i32> %and2) 251 %shuf = shufflevector <4 x i32> %hadd, <4x i32> zeroinitializer, <4 x i32> <i32 4, i32 5, i32 6, i32 3> 252 %ret = icmp ne <4 x i32> %shuf, <i32 8, i32 8, i32 8, i32 8> 253 ret <4 x i1> %ret 254} 255