1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -passes=instcombine -mtriple=x86_64-unknown-unknown -S | FileCheck %s 3 4; 5; UNDEF Elts 6; 7 8define <8 x i16> @undef_pmulhu_128(<8 x i16> %a0) { 9; CHECK-LABEL: @undef_pmulhu_128( 10; CHECK-NEXT: ret <8 x i16> zeroinitializer 11; 12 %1 = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %a0, <8 x i16> undef) 13 ret <8 x i16> %1 14} 15 16define <8 x i16> @undef_pmulhu_128_commute(<8 x i16> %a0) { 17; CHECK-LABEL: @undef_pmulhu_128_commute( 18; CHECK-NEXT: ret <8 x i16> zeroinitializer 19; 20 %1 = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> undef, <8 x i16> %a0) 21 ret <8 x i16> %1 22} 23 24define <16 x i16> @undef_pmulhu_256(<16 x i16> %a0) { 25; CHECK-LABEL: @undef_pmulhu_256( 26; CHECK-NEXT: ret <16 x i16> zeroinitializer 27; 28 %1 = call <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16> %a0, <16 x i16> undef) 29 ret <16 x i16> %1 30} 31 32define <16 x i16> @undef_pmulhu_256_commute(<16 x i16> %a0) { 33; CHECK-LABEL: @undef_pmulhu_256_commute( 34; CHECK-NEXT: ret <16 x i16> zeroinitializer 35; 36 %1 = call <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16> undef, <16 x i16> %a0) 37 ret <16 x i16> %1 38} 39 40define <32 x i16> @undef_pmulhu_512(<32 x i16> %a0) { 41; CHECK-LABEL: @undef_pmulhu_512( 42; CHECK-NEXT: ret <32 x i16> zeroinitializer 43; 44 %1 = call <32 x i16> @llvm.x86.avx512.pmulhu.w.512(<32 x i16> %a0, <32 x i16> undef) 45 ret <32 x i16> %1 46} 47 48define <32 x i16> @undef_pmulhu_512_commute(<32 x i16> %a0) { 49; CHECK-LABEL: @undef_pmulhu_512_commute( 50; CHECK-NEXT: ret <32 x i16> zeroinitializer 51; 52 %1 = call <32 x i16> @llvm.x86.avx512.pmulhu.w.512(<32 x i16> undef, <32 x i16> %a0) 53 ret <32 x i16> %1 54} 55 56; 57; Zero Elts 58; 59 60define <8 x i16> @zero_pmulhu_128(<8 x i16> %a0) { 61; CHECK-LABEL: @zero_pmulhu_128( 62; CHECK-NEXT: ret <8 x i16> zeroinitializer 63; 64 %1 = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %a0, <8 x i16> zeroinitializer) 65 ret <8 x i16> %1 66} 67 68define <8 x i16> @zero_pmulhu_128_commute(<8 x i16> %a0) { 69; CHECK-LABEL: @zero_pmulhu_128_commute( 70; CHECK-NEXT: ret <8 x i16> zeroinitializer 71; 72 %1 = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> zeroinitializer, <8 x i16> %a0) 73 ret <8 x i16> %1 74} 75 76define <16 x i16> @zero_pmulhu_256(<16 x i16> %a0) { 77; CHECK-LABEL: @zero_pmulhu_256( 78; CHECK-NEXT: ret <16 x i16> zeroinitializer 79; 80 %1 = call <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16> %a0, <16 x i16> zeroinitializer) 81 ret <16 x i16> %1 82} 83 84define <16 x i16> @zero_pmulhu_256_commute(<16 x i16> %a0) { 85; CHECK-LABEL: @zero_pmulhu_256_commute( 86; CHECK-NEXT: ret <16 x i16> zeroinitializer 87; 88 %1 = call <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16> zeroinitializer, <16 x i16> %a0) 89 ret <16 x i16> %1 90} 91 92define <32 x i16> @zero_pmulhu_512(<32 x i16> %a0) { 93; CHECK-LABEL: @zero_pmulhu_512( 94; CHECK-NEXT: ret <32 x i16> zeroinitializer 95; 96 %1 = call <32 x i16> @llvm.x86.avx512.pmulhu.w.512(<32 x i16> %a0, <32 x i16> zeroinitializer) 97 ret <32 x i16> %1 98} 99 100define <32 x i16> @zero_pmulhu_512_commute(<32 x i16> %a0) { 101; CHECK-LABEL: @zero_pmulhu_512_commute( 102; CHECK-NEXT: ret <32 x i16> zeroinitializer 103; 104 %1 = call <32 x i16> @llvm.x86.avx512.pmulhu.w.512(<32 x i16> zeroinitializer, <32 x i16> %a0) 105 ret <32 x i16> %1 106} 107 108; 109; Multiply by One 110; 111 112define <8 x i16> @one_pmulhu_128(<8 x i16> %a0) { 113; CHECK-LABEL: @one_pmulhu_128( 114; CHECK-NEXT: ret <8 x i16> zeroinitializer 115; 116 %1 = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %a0, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>) 117 ret <8 x i16> %1 118} 119 120define <8 x i16> @one_pmulhu_128_commute(<8 x i16> %a0) { 121; CHECK-LABEL: @one_pmulhu_128_commute( 122; CHECK-NEXT: ret <8 x i16> zeroinitializer 123; 124 %1 = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, <8 x i16> %a0) 125 ret <8 x i16> %1 126} 127 128define <16 x i16> @one_pmulhu_256(<16 x i16> %a0) { 129; CHECK-LABEL: @one_pmulhu_256( 130; CHECK-NEXT: ret <16 x i16> zeroinitializer 131; 132 %1 = call <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16> %a0, <16 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>) 133 ret <16 x i16> %1 134} 135 136define <16 x i16> @one_pmulhu_256_commute(<16 x i16> %a0) { 137; CHECK-LABEL: @one_pmulhu_256_commute( 138; CHECK-NEXT: ret <16 x i16> zeroinitializer 139; 140 %1 = call <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, <16 x i16> %a0) 141 ret <16 x i16> %1 142} 143 144define <32 x i16> @one_pmulhu_512(<32 x i16> %a0) { 145; CHECK-LABEL: @one_pmulhu_512( 146; CHECK-NEXT: ret <32 x i16> zeroinitializer 147; 148 %1 = call <32 x i16> @llvm.x86.avx512.pmulhu.w.512(<32 x i16> %a0, <32 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>) 149 ret <32 x i16> %1 150} 151 152define <32 x i16> @one_pmulhu_512_commute(<32 x i16> %a0) { 153; CHECK-LABEL: @one_pmulhu_512_commute( 154; CHECK-NEXT: ret <32 x i16> zeroinitializer 155; 156 %1 = call <32 x i16> @llvm.x86.avx512.pmulhu.w.512(<32 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, <32 x i16> %a0) 157 ret <32 x i16> %1 158} 159 160; 161; Constant Folding 162; 163 164define <8 x i16> @fold_pmulhu_128() { 165; CHECK-LABEL: @fold_pmulhu_128( 166; CHECK-NEXT: ret <8 x i16> <i16 -6, i16 0, i16 1, i16 32763, i16 -14, i16 5, i16 3, i16 32757> 167; 168 %1 = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> <i16 -1, i16 2, i16 3, i16 -4, i16 -5, i16 6, i16 7, i16 -8>, <8 x i16> <i16 -5, i16 7, i16 -32768, i16 32765, i16 -9, i16 -11, i16 -32763, i16 32761>) 169 ret <8 x i16> %1 170} 171 172define <16 x i16> @fold_pmulhu_256() { 173; CHECK-LABEL: @fold_pmulhu_256( 174; CHECK-NEXT: ret <16 x i16> <i16 0, i16 6, i16 1, i16 1, i16 -13, i16 -16, i16 3, i16 3, i16 12, i16 8, i16 -32766, i16 5, i16 16, i16 12, i16 -32764, i16 32748> 175; 176 %1 = call <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16> <i16 0, i16 -1, i16 2, i16 3, i16 -4, i16 -5, i16 6, i16 7, i16 -8, i16 9, i16 -10, i16 11, i16 -12, i16 13, i16 -14, i16 -15>, <16 x i16> <i16 -5, i16 7, i16 -32768, i16 32766, i16 -9, i16 -11, i16 -32764, i16 32762, i16 13, i16 -15, i16 -32760, i16 32758, i16 17, i16 -19, i16 -32756, i16 32756>) 177 ret <16 x i16> %1 178} 179 180define <32 x i16> @fold_pmulhu_512() { 181; CHECK-LABEL: @fold_pmulhu_512( 182; CHECK-NEXT: ret <32 x i16> <i16 0, i16 6, i16 1, i16 1, i16 -13, i16 -16, i16 3, i16 3, i16 12, i16 8, i16 -32766, i16 5, i16 16, i16 12, i16 -32764, i16 32748, i16 0, i16 6, i16 1, i16 1, i16 -13, i16 -16, i16 3, i16 3, i16 12, i16 8, i16 -32766, i16 5, i16 16, i16 12, i16 -32764, i16 32748> 183; 184 %1 = call <32 x i16> @llvm.x86.avx512.pmulhu.w.512(<32 x i16> <i16 0, i16 -1, i16 2, i16 3, i16 -4, i16 -5, i16 6, i16 7, i16 -8, i16 9, i16 -10, i16 11, i16 -12, i16 13, i16 -14, i16 -15, i16 -5, i16 7, i16 -32768, i16 32766, i16 -9, i16 -11, i16 -32764, i16 32762, i16 13, i16 -15, i16 -32760, i16 32758, i16 17, i16 -19, i16 -32756, i16 32756>, <32 x i16> <i16 -5, i16 7, i16 -32768, i16 32766, i16 -9, i16 -11, i16 -32764, i16 32762, i16 13, i16 -15, i16 -32760, i16 32758, i16 17, i16 -19, i16 -32756, i16 32756, i16 0, i16 -1, i16 2, i16 3, i16 -4, i16 -5, i16 6, i16 7, i16 -8, i16 9, i16 -10, i16 11, i16 -12, i16 13, i16 -14, i16 -15>) 185 ret <32 x i16> %1 186} 187 188; 189; Demanded Elts 190; 191 192define <8 x i16> @elts_pmulhu_128(<8 x i16> %a0, <8 x i16> %a1) { 193; CHECK-LABEL: @elts_pmulhu_128( 194; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> [[A0:%.*]], <8 x i16> [[A1:%.*]]) 195; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> zeroinitializer 196; CHECK-NEXT: ret <8 x i16> [[TMP2]] 197; 198 %1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2> 199 %2 = shufflevector <8 x i16> %a1, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6> 200 %3 = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %1, <8 x i16> %2) 201 %4 = shufflevector <8 x i16> %3, <8 x i16> poison, <8 x i32> zeroinitializer 202 ret <8 x i16> %4 203} 204 205define <16 x i16> @elts_pmulhu_256(<16 x i16> %a0, <16 x i16> %a1) { 206; CHECK-LABEL: @elts_pmulhu_256( 207; CHECK-NEXT: [[TMP1:%.*]] = call <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16> [[A0:%.*]], <16 x i16> [[A1:%.*]]) 208; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[TMP1]], <16 x i16> poison, <16 x i32> zeroinitializer 209; CHECK-NEXT: ret <16 x i16> [[TMP2]] 210; 211 %1 = shufflevector <16 x i16> %a0, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 212 %2 = shufflevector <16 x i16> %a1, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 213 %3 = call <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16> %1, <16 x i16> %2) 214 %4 = shufflevector <16 x i16> %3, <16 x i16> poison, <16 x i32> zeroinitializer 215 ret <16 x i16> %4 216} 217 218define <32 x i16> @elts_pmulhu_512(<32 x i16> %a0, <32 x i16> %a1) { 219; CHECK-LABEL: @elts_pmulhu_512( 220; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i16> @llvm.x86.avx512.pmulhu.w.512(<32 x i16> [[A0:%.*]], <32 x i16> [[A1:%.*]]) 221; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <32 x i16> [[TMP1]], <32 x i16> poison, <32 x i32> zeroinitializer 222; CHECK-NEXT: ret <32 x i16> [[TMP2]] 223; 224 %1 = shufflevector <32 x i16> %a0, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 225 %2 = shufflevector <32 x i16> %a1, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 226 %3 = call <32 x i16> @llvm.x86.avx512.pmulhu.w.512(<32 x i16> %1, <32 x i16> %2) 227 %4 = shufflevector <32 x i16> %3, <32 x i16> poison, <32 x i32> zeroinitializer 228 ret <32 x i16> %4 229} 230 231; 232; Known Bits 233; 234 235define <8 x i16> @known_pmulhu_128(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> %a2) { 236; CHECK-LABEL: @known_pmulhu_128( 237; CHECK-NEXT: ret <8 x i16> [[A2:%.*]] 238; 239 %x0 = lshr <8 x i16> %a0, <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15> 240 %x1 = and <8 x i16> %a1, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> 241 %m = tail call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %x0, <8 x i16> %x1) 242 %r = add <8 x i16> %m, %a2 243 ret <8 x i16> %r 244} 245 246define <16 x i16> @known_pmulhu_256(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> %a2) { 247; CHECK-LABEL: @known_pmulhu_256( 248; CHECK-NEXT: ret <16 x i16> [[A2:%.*]] 249; 250 %x0 = lshr <16 x i16> %a0, <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15> 251 %x1 = and <16 x i16> %a1, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> 252 %m = tail call <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16> %x0, <16 x i16> %x1) 253 %r = add <16 x i16> %m, %a2 254 ret <16 x i16> %r 255} 256 257define <32 x i16> @known_pmulhu_512(<32 x i16> %a0, <32 x i16> %a1, <32 x i16> %a2) { 258; CHECK-LABEL: @known_pmulhu_512( 259; CHECK-NEXT: ret <32 x i16> [[A2:%.*]] 260; 261 %x0 = lshr <32 x i16> %a0, <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15> 262 %x1 = and <32 x i16> %a1, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> 263 %m = tail call <32 x i16> @llvm.x86.avx512.pmulhu.w.512(<32 x i16> %x0, <32 x i16> %x1) 264 %r = add <32 x i16> %m, %a2 265 ret <32 x i16> %r 266} 267