1; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py 2; RUN: opt < %s -S -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp -passes="print<cost-model>" 2>&1 -disable-output | FileCheck %s 3 4target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" 5 6define void @add_i8() { 7; CHECK-LABEL: 'add_i8' 8; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a0 = call i8 @llvm.vector.reduce.add.v1i8(<1 x i8> undef) 9; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %a1 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef) 10; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef) 11; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a3 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef) 12; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a4 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef) 13; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void 14; 15 %a0 = call i8 @llvm.vector.reduce.add.v1i8(<1 x i8> undef) 16 17 %a1 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef) 18 19 %a2 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef) 20 21 %a3 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef) 22 23 %a4 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef) 24 25 ret void 26} 27 28define void @add_i16() { 29; CHECK-LABEL: 'add_i16' 30; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a0za = zext <1 x i8> undef to <1 x i16> 31; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a0z = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> %a0za) 32; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a0sa = sext <1 x i8> undef to <1 x i16> 33; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a0s = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> %a0sa) 34; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a1za = zext <2 x i8> undef to <2 x i16> 35; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %a1z = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> %a1za) 36; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a1sa = sext <2 x i8> undef to <2 x i16> 37; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %a1s = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> %a1sa) 38; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2za = zext <4 x i8> undef to <4 x i16> 39; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2z = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %a2za) 40; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2sa = sext <4 x i8> undef to <4 x i16> 41; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2s = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %a2sa) 42; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a3za = zext <8 x i8> undef to <8 x i16> 43; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a3z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %a3za) 44; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a3sa = sext <8 x i8> undef to <8 x i16> 45; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a3s = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %a3sa) 46; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %a4za = zext <16 x i8> undef to <16 x i16> 47; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a4z = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %a4za) 48; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %a4sa = sext <16 x i8> undef to <16 x i16> 49; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a4s = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %a4sa) 50; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a5 = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> undef) 51; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %a6 = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> undef) 52; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a7 = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> undef) 53; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a8 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> undef) 54; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a9 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> undef) 55; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void 56; 57 %a0za = zext <1 x i8> undef to <1 x i16> 58 %a0z = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> %a0za) 59 60 %a0sa = sext <1 x i8> undef to <1 x i16> 61 %a0s = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> %a0sa) 62 63 %a1za = zext <2 x i8> undef to <2 x i16> 64 %a1z = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> %a1za) 65 66 %a1sa = sext <2 x i8> undef to <2 x i16> 67 %a1s = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> %a1sa) 68 69 %a2za = zext <4 x i8> undef to <4 x i16> 70 %a2z = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %a2za) 71 72 %a2sa = sext <4 x i8> undef to <4 x i16> 73 %a2s = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %a2sa) 74 75 %a3za = zext <8 x i8> undef to <8 x i16> 76 %a3z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %a3za) 77 78 %a3sa = sext <8 x i8> undef to <8 x i16> 79 %a3s = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %a3sa) 80 81 %a4za = zext <16 x i8> undef to <16 x i16> 82 %a4z = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %a4za) 83 84 %a4sa = sext <16 x i8> undef to <16 x i16> 85 %a4s = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %a4sa) 86 87 %a5 = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> undef) 88 89 %a6 = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> undef) 90 91 %a7 = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> undef) 92 93 %a8 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> undef) 94 95 %a9 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> undef) 96 97 ret void 98} 99 100define void @add_i32() { 101; CHECK-LABEL: 'add_i32' 102; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a0za = zext <1 x i8> undef to <1 x i32> 103; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a0z = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a0za) 104; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a0sa = sext <1 x i8> undef to <1 x i32> 105; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a0s = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a0sa) 106; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a1za = zext <2 x i8> undef to <2 x i32> 107; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %a1z = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a1za) 108; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a1sa = sext <2 x i8> undef to <2 x i32> 109; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %a1s = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a1sa) 110; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a2za = zext <4 x i8> undef to <4 x i32> 111; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a2za) 112; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a2sa = sext <4 x i8> undef to <4 x i32> 113; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2s = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a2sa) 114; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %a3za = zext <8 x i8> undef to <8 x i32> 115; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a3z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a3za) 116; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %a3sa = sext <8 x i8> undef to <8 x i32> 117; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a3s = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a3sa) 118; CHECK-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %a4za = zext <16 x i8> undef to <16 x i32> 119; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a4z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a4za) 120; CHECK-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %a4sa = sext <16 x i8> undef to <16 x i32> 121; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a4s = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a4sa) 122; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a5za = zext <1 x i16> undef to <1 x i32> 123; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a5z = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a5za) 124; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a5sa = sext <1 x i16> undef to <1 x i32> 125; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a5s = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a5sa) 126; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a6za = zext <2 x i16> undef to <2 x i32> 127; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %a6z = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a6za) 128; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a6sa = sext <2 x i16> undef to <2 x i32> 129; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %a6s = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a6sa) 130; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a7za = zext <4 x i16> undef to <4 x i32> 131; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a7z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a7za) 132; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a7sa = sext <4 x i16> undef to <4 x i32> 133; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a7s = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a7sa) 134; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %a8za = zext <8 x i16> undef to <8 x i32> 135; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a8z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a8za) 136; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %a8sa = sext <8 x i16> undef to <8 x i32> 137; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a8s = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a8sa) 138; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %a9za = zext <16 x i16> undef to <16 x i32> 139; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a9z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a9za) 140; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %a9sa = sext <16 x i16> undef to <16 x i32> 141; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a9s = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a9sa) 142; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a10 = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> undef) 143; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %a11 = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> undef) 144; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a12 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> undef) 145; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a13 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> undef) 146; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a14 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> undef) 147; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void 148; 149 %a0za = zext <1 x i8> undef to <1 x i32> 150 %a0z = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a0za) 151 152 %a0sa = sext <1 x i8> undef to <1 x i32> 153 %a0s = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a0sa) 154 155 %a1za = zext <2 x i8> undef to <2 x i32> 156 %a1z = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a1za) 157 158 %a1sa = sext <2 x i8> undef to <2 x i32> 159 %a1s = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a1sa) 160 161 %a2za = zext <4 x i8> undef to <4 x i32> 162 %a2z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a2za) 163 164 %a2sa = sext <4 x i8> undef to <4 x i32> 165 %a2s = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a2sa) 166 167 %a3za = zext <8 x i8> undef to <8 x i32> 168 %a3z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a3za) 169 170 %a3sa = sext <8 x i8> undef to <8 x i32> 171 %a3s = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a3sa) 172 173 %a4za = zext <16 x i8> undef to <16 x i32> 174 %a4z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a4za) 175 176 %a4sa = sext <16 x i8> undef to <16 x i32> 177 %a4s = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a4sa) 178 179 %a5za = zext <1 x i16> undef to <1 x i32> 180 %a5z = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a5za) 181 182 %a5sa = sext <1 x i16> undef to <1 x i32> 183 %a5s = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a5sa) 184 185 %a6za = zext <2 x i16> undef to <2 x i32> 186 %a6z = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a6za) 187 188 %a6sa = sext <2 x i16> undef to <2 x i32> 189 %a6s = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a6sa) 190 191 %a7za = zext <4 x i16> undef to <4 x i32> 192 %a7z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a7za) 193 194 %a7sa = sext <4 x i16> undef to <4 x i32> 195 %a7s = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a7sa) 196 197 %a8za = zext <8 x i16> undef to <8 x i32> 198 %a8z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a8za) 199 200 %a8sa = sext <8 x i16> undef to <8 x i32> 201 %a8s = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a8sa) 202 203 %a9za = zext <16 x i16> undef to <16 x i32> 204 %a9z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a9za) 205 206 %a9sa = sext <16 x i16> undef to <16 x i32> 207 %a9s = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a9sa) 208 209 %a10 = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> undef) 210 211 %a11 = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> undef) 212 213 %a12 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> undef) 214 215 %a13 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> undef) 216 217 %a14 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> undef) 218 219 ret void 220} 221 222define void @add_i64() { 223; CHECK-LABEL: 'add_i64' 224; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %a0za = zext <1 x i8> undef to <1 x i64> 225; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a0z = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a0za) 226; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %a0sa = sext <1 x i8> undef to <1 x i64> 227; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a0s = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a0sa) 228; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a1za = zext <2 x i8> undef to <2 x i64> 229; CHECK-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %a1z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a1za) 230; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %a1sa = sext <2 x i8> undef to <2 x i64> 231; CHECK-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %a1s = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a1sa) 232; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %a2za = zext <4 x i8> undef to <4 x i64> 233; CHECK-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %a2z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a2za) 234; CHECK-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %a2sa = sext <4 x i8> undef to <4 x i64> 235; CHECK-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %a2s = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a2sa) 236; CHECK-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %a3za = zext <8 x i8> undef to <8 x i64> 237; CHECK-NEXT: Cost Model: Found an estimated cost of 408 for instruction: %a3z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a3za) 238; CHECK-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %a3sa = sext <8 x i8> undef to <8 x i64> 239; CHECK-NEXT: Cost Model: Found an estimated cost of 408 for instruction: %a3s = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a3sa) 240; CHECK-NEXT: Cost Model: Found an estimated cost of 298 for instruction: %a4za = zext <16 x i8> undef to <16 x i64> 241; CHECK-NEXT: Cost Model: Found an estimated cost of 808 for instruction: %a4z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a4za) 242; CHECK-NEXT: Cost Model: Found an estimated cost of 1322 for instruction: %a4sa = sext <16 x i8> undef to <16 x i64> 243; CHECK-NEXT: Cost Model: Found an estimated cost of 808 for instruction: %a4s = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a4sa) 244; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %a5za = zext <1 x i16> undef to <1 x i64> 245; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a5z = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a5za) 246; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %a5sa = sext <1 x i16> undef to <1 x i64> 247; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a5s = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a5sa) 248; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a6za = zext <2 x i16> undef to <2 x i64> 249; CHECK-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %a6z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a6za) 250; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %a6sa = sext <2 x i16> undef to <2 x i64> 251; CHECK-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %a6s = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a6sa) 252; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %a7za = zext <4 x i16> undef to <4 x i64> 253; CHECK-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %a7z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a7za) 254; CHECK-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %a7sa = sext <4 x i16> undef to <4 x i64> 255; CHECK-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %a7s = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a7sa) 256; CHECK-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %a8za = zext <8 x i16> undef to <8 x i64> 257; CHECK-NEXT: Cost Model: Found an estimated cost of 408 for instruction: %a8z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a8za) 258; CHECK-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %a8sa = sext <8 x i16> undef to <8 x i64> 259; CHECK-NEXT: Cost Model: Found an estimated cost of 408 for instruction: %a8s = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a8sa) 260; CHECK-NEXT: Cost Model: Found an estimated cost of 296 for instruction: %a9za = zext <16 x i16> undef to <16 x i64> 261; CHECK-NEXT: Cost Model: Found an estimated cost of 808 for instruction: %a9z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a9za) 262; CHECK-NEXT: Cost Model: Found an estimated cost of 1320 for instruction: %a9sa = sext <16 x i16> undef to <16 x i64> 263; CHECK-NEXT: Cost Model: Found an estimated cost of 808 for instruction: %a9s = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a9sa) 264; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %a10za = zext <1 x i32> undef to <1 x i64> 265; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a10z = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a10za) 266; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %a10sa = sext <1 x i32> undef to <1 x i64> 267; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a10s = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a10sa) 268; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a11za = zext <2 x i32> undef to <2 x i64> 269; CHECK-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %a11z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a11za) 270; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %a11sa = sext <2 x i32> undef to <2 x i64> 271; CHECK-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %a11s = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a11sa) 272; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %a12za = zext <4 x i32> undef to <4 x i64> 273; CHECK-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %a12z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a12za) 274; CHECK-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %a12sa = sext <4 x i32> undef to <4 x i64> 275; CHECK-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %a12s = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a12sa) 276; CHECK-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %a13za = zext <8 x i32> undef to <8 x i64> 277; CHECK-NEXT: Cost Model: Found an estimated cost of 408 for instruction: %a13z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a13za) 278; CHECK-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %a13sa = sext <8 x i32> undef to <8 x i64> 279; CHECK-NEXT: Cost Model: Found an estimated cost of 408 for instruction: %a13s = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a13sa) 280; CHECK-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %a14za = zext <16 x i32> undef to <16 x i64> 281; CHECK-NEXT: Cost Model: Found an estimated cost of 808 for instruction: %a14z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a14za) 282; CHECK-NEXT: Cost Model: Found an estimated cost of 1056 for instruction: %a14sa = sext <16 x i32> undef to <16 x i64> 283; CHECK-NEXT: Cost Model: Found an estimated cost of 808 for instruction: %a14s = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a14sa) 284; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a15 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> undef) 285; CHECK-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %a16 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef) 286; CHECK-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %a17 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef) 287; CHECK-NEXT: Cost Model: Found an estimated cost of 408 for instruction: %a18 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef) 288; CHECK-NEXT: Cost Model: Found an estimated cost of 808 for instruction: %a19 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef) 289; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void 290; 291 %a0za = zext <1 x i8> undef to <1 x i64> 292 %a0z = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a0za) 293 294 %a0sa = sext <1 x i8> undef to <1 x i64> 295 %a0s = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a0sa) 296 297 %a1za = zext <2 x i8> undef to <2 x i64> 298 %a1z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a1za) 299 300 %a1sa = sext <2 x i8> undef to <2 x i64> 301 %a1s = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a1sa) 302 303 %a2za = zext <4 x i8> undef to <4 x i64> 304 %a2z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a2za) 305 306 %a2sa = sext <4 x i8> undef to <4 x i64> 307 %a2s = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a2sa) 308 309 %a3za = zext <8 x i8> undef to <8 x i64> 310 %a3z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a3za) 311 312 %a3sa = sext <8 x i8> undef to <8 x i64> 313 %a3s = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a3sa) 314 315 %a4za = zext <16 x i8> undef to <16 x i64> 316 %a4z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a4za) 317 318 %a4sa = sext <16 x i8> undef to <16 x i64> 319 %a4s = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a4sa) 320 321 %a5za = zext <1 x i16> undef to <1 x i64> 322 %a5z = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a5za) 323 324 %a5sa = sext <1 x i16> undef to <1 x i64> 325 %a5s = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a5sa) 326 327 %a6za = zext <2 x i16> undef to <2 x i64> 328 %a6z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a6za) 329 330 %a6sa = sext <2 x i16> undef to <2 x i64> 331 %a6s = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a6sa) 332 333 %a7za = zext <4 x i16> undef to <4 x i64> 334 %a7z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a7za) 335 336 %a7sa = sext <4 x i16> undef to <4 x i64> 337 %a7s = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a7sa) 338 339 %a8za = zext <8 x i16> undef to <8 x i64> 340 %a8z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a8za) 341 342 %a8sa = sext <8 x i16> undef to <8 x i64> 343 %a8s = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a8sa) 344 345 %a9za = zext <16 x i16> undef to <16 x i64> 346 %a9z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a9za) 347 348 %a9sa = sext <16 x i16> undef to <16 x i64> 349 %a9s = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a9sa) 350 351 %a10za = zext <1 x i32> undef to <1 x i64> 352 %a10z = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a10za) 353 354 %a10sa = sext <1 x i32> undef to <1 x i64> 355 %a10s = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a10sa) 356 357 %a11za = zext <2 x i32> undef to <2 x i64> 358 %a11z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a11za) 359 360 %a11sa = sext <2 x i32> undef to <2 x i64> 361 %a11s = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a11sa) 362 363 %a12za = zext <4 x i32> undef to <4 x i64> 364 %a12z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a12za) 365 366 %a12sa = sext <4 x i32> undef to <4 x i64> 367 %a12s = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a12sa) 368 369 %a13za = zext <8 x i32> undef to <8 x i64> 370 %a13z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a13za) 371 372 %a13sa = sext <8 x i32> undef to <8 x i64> 373 %a13s = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a13sa) 374 375 %a14za = zext <16 x i32> undef to <16 x i64> 376 %a14z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a14za) 377 378 %a14sa = sext <16 x i32> undef to <16 x i64> 379 %a14s = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a14sa) 380 381 %a15 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> undef) 382 383 %a16 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef) 384 385 %a17 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef) 386 387 %a18 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef) 388 389 %a19 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef) 390 391 ret void 392} 393 394define void @mla_i8() { 395; CHECK-LABEL: 'mla_i8' 396; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a0m = mul <1 x i8> undef, undef 397; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a0 = call i8 @llvm.vector.reduce.add.v1i8(<1 x i8> %a0m) 398; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %a1m = mul <2 x i8> undef, undef 399; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %a1 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> %a1m) 400; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2m = mul <4 x i8> undef, undef 401; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> %a2m) 402; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a3m = mul <8 x i8> undef, undef 403; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a3 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> %a3m) 404; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a4m = mul <16 x i8> undef, undef 405; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a4 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %a4m) 406; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void 407; 408 %a0m = mul <1 x i8> undef, undef 409 %a0 = call i8 @llvm.vector.reduce.add.v1i8(<1 x i8> %a0m) 410 411 %a1m = mul <2 x i8> undef, undef 412 %a1 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> %a1m) 413 414 %a2m = mul <4 x i8> undef, undef 415 %a2 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> %a2m) 416 417 %a3m = mul <8 x i8> undef, undef 418 %a3 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> %a3m) 419 420 %a4m = mul <16 x i8> undef, undef 421 %a4 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %a4m) 422 423 ret void 424} 425 426define void @mla_i16() { 427; CHECK-LABEL: 'mla_i16' 428; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a0za = zext <1 x i8> undef to <1 x i16> 429; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a0zb = zext <1 x i8> undef to <1 x i16> 430; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a0zm = mul <1 x i16> %a0za, %a0zb 431; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a0z = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> %a0zm) 432; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a0sa = sext <1 x i8> undef to <1 x i16> 433; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a0sb = sext <1 x i8> undef to <1 x i16> 434; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a0sm = mul <1 x i16> %a0sa, %a0sb 435; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a0s = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> %a0sm) 436; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a1za = zext <2 x i8> undef to <2 x i16> 437; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a1zb = zext <2 x i8> undef to <2 x i16> 438; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %a1zm = mul <2 x i16> %a1za, %a1zb 439; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %a1z = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> %a1zm) 440; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a1sa = sext <2 x i8> undef to <2 x i16> 441; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a1sb = sext <2 x i8> undef to <2 x i16> 442; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %a1sm = mul <2 x i16> %a1sa, %a1sb 443; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %a1s = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> %a1sm) 444; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2za = zext <4 x i8> undef to <4 x i16> 445; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2zb = zext <4 x i8> undef to <4 x i16> 446; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2zm = mul <4 x i16> %a2za, %a2zb 447; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2z = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %a2zm) 448; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2sa = sext <4 x i8> undef to <4 x i16> 449; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2sb = sext <4 x i8> undef to <4 x i16> 450; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2sm = mul <4 x i16> %a2sa, %a2sb 451; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2s = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %a2sm) 452; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a3za = zext <8 x i8> undef to <8 x i16> 453; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a3zb = zext <8 x i8> undef to <8 x i16> 454; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a3zm = mul <8 x i16> %a3za, %a3zb 455; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a3z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %a3zm) 456; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a3sa = sext <8 x i8> undef to <8 x i16> 457; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a3sb = sext <8 x i8> undef to <8 x i16> 458; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a3sm = mul <8 x i16> %a3sa, %a3sb 459; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a3s = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %a3sm) 460; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %a4za = zext <16 x i8> undef to <16 x i16> 461; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %a4zb = zext <16 x i8> undef to <16 x i16> 462; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a4zm = mul <16 x i16> %a4za, %a4zb 463; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a4z = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %a4zm) 464; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %a4sa = sext <16 x i8> undef to <16 x i16> 465; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %a4sb = sext <16 x i8> undef to <16 x i16> 466; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a4sm = mul <16 x i16> %a4sa, %a4sb 467; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a4s = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %a4sm) 468; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a5m = mul <1 x i16> undef, undef 469; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a5 = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> %a5m) 470; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %a6m = mul <2 x i16> undef, undef 471; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %a6 = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> %a6m) 472; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a7m = mul <4 x i16> undef, undef 473; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a7 = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %a7m) 474; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a8m = mul <8 x i16> undef, undef 475; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a8 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %a8m) 476; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a9m = mul <16 x i16> undef, undef 477; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a9 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %a9m) 478; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void 479; 480 %a0za = zext <1 x i8> undef to <1 x i16> 481 %a0zb = zext <1 x i8> undef to <1 x i16> 482 %a0zm = mul <1 x i16> %a0za, %a0zb 483 %a0z = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> %a0zm) 484 485 %a0sa = sext <1 x i8> undef to <1 x i16> 486 %a0sb = sext <1 x i8> undef to <1 x i16> 487 %a0sm = mul <1 x i16> %a0sa, %a0sb 488 %a0s = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> %a0sm) 489 490 %a1za = zext <2 x i8> undef to <2 x i16> 491 %a1zb = zext <2 x i8> undef to <2 x i16> 492 %a1zm = mul <2 x i16> %a1za, %a1zb 493 %a1z = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> %a1zm) 494 495 %a1sa = sext <2 x i8> undef to <2 x i16> 496 %a1sb = sext <2 x i8> undef to <2 x i16> 497 %a1sm = mul <2 x i16> %a1sa, %a1sb 498 %a1s = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> %a1sm) 499 500 %a2za = zext <4 x i8> undef to <4 x i16> 501 %a2zb = zext <4 x i8> undef to <4 x i16> 502 %a2zm = mul <4 x i16> %a2za, %a2zb 503 %a2z = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %a2zm) 504 505 %a2sa = sext <4 x i8> undef to <4 x i16> 506 %a2sb = sext <4 x i8> undef to <4 x i16> 507 %a2sm = mul <4 x i16> %a2sa, %a2sb 508 %a2s = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %a2sm) 509 510 %a3za = zext <8 x i8> undef to <8 x i16> 511 %a3zb = zext <8 x i8> undef to <8 x i16> 512 %a3zm = mul <8 x i16> %a3za, %a3zb 513 %a3z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %a3zm) 514 515 %a3sa = sext <8 x i8> undef to <8 x i16> 516 %a3sb = sext <8 x i8> undef to <8 x i16> 517 %a3sm = mul <8 x i16> %a3sa, %a3sb 518 %a3s = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %a3sm) 519 520 %a4za = zext <16 x i8> undef to <16 x i16> 521 %a4zb = zext <16 x i8> undef to <16 x i16> 522 %a4zm = mul <16 x i16> %a4za, %a4zb 523 %a4z = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %a4zm) 524 525 %a4sa = sext <16 x i8> undef to <16 x i16> 526 %a4sb = sext <16 x i8> undef to <16 x i16> 527 %a4sm = mul <16 x i16> %a4sa, %a4sb 528 %a4s = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %a4sm) 529 530 %a5m = mul <1 x i16> undef, undef 531 %a5 = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> %a5m) 532 533 %a6m = mul <2 x i16> undef, undef 534 %a6 = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> %a6m) 535 536 %a7m = mul <4 x i16> undef, undef 537 %a7 = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %a7m) 538 539 %a8m = mul <8 x i16> undef, undef 540 %a8 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %a8m) 541 542 %a9m = mul <16 x i16> undef, undef 543 %a9 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %a9m) 544 545 ret void 546} 547 548define void @mla_i32() { 549; CHECK-LABEL: 'mla_i32' 550; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a0za = zext <1 x i8> undef to <1 x i32> 551; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a0zb = zext <1 x i8> undef to <1 x i32> 552; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a0zm = mul <1 x i32> %a0za, %a0zb 553; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a0z = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a0zm) 554; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a0sa = sext <1 x i8> undef to <1 x i32> 555; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a0sb = sext <1 x i8> undef to <1 x i32> 556; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a0sm = mul <1 x i32> %a0sa, %a0sb 557; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a0s = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a0sm) 558; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a1za = zext <2 x i8> undef to <2 x i32> 559; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a1zb = zext <2 x i8> undef to <2 x i32> 560; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %a1zm = mul <2 x i32> %a1za, %a1zb 561; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %a1z = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a1zm) 562; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a1sa = sext <2 x i8> undef to <2 x i32> 563; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a1sb = sext <2 x i8> undef to <2 x i32> 564; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %a1sm = mul <2 x i32> %a1sa, %a1sb 565; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %a1s = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a1sm) 566; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a2za = zext <4 x i8> undef to <4 x i32> 567; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a2zb = zext <4 x i8> undef to <4 x i32> 568; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2zm = mul <4 x i32> %a2za, %a2zb 569; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a2zm) 570; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a2sa = sext <4 x i8> undef to <4 x i32> 571; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a2sb = sext <4 x i8> undef to <4 x i32> 572; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2sm = mul <4 x i32> %a2sa, %a2sb 573; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2s = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a2sm) 574; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %a3za = zext <8 x i8> undef to <8 x i32> 575; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %a3zb = zext <8 x i8> undef to <8 x i32> 576; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a3zm = mul <8 x i32> %a3za, %a3zb 577; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a3z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a3zm) 578; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %a3sa = sext <8 x i8> undef to <8 x i32> 579; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %a3sb = sext <8 x i8> undef to <8 x i32> 580; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a3sm = mul <8 x i32> %a3sa, %a3sb 581; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a3s = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a3sm) 582; CHECK-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %a4za = zext <16 x i8> undef to <16 x i32> 583; CHECK-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %a4zb = zext <16 x i8> undef to <16 x i32> 584; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a4zm = mul <16 x i32> %a4za, %a4zb 585; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a4z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a4zm) 586; CHECK-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %a4sa = sext <16 x i8> undef to <16 x i32> 587; CHECK-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %a4sb = sext <16 x i8> undef to <16 x i32> 588; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a4sm = mul <16 x i32> %a4sa, %a4sb 589; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a4s = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a4sm) 590; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a5za = zext <1 x i16> undef to <1 x i32> 591; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a5zb = zext <1 x i16> undef to <1 x i32> 592; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a5zm = mul <1 x i32> %a5za, %a5zb 593; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a5z = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a5zm) 594; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a5sa = sext <1 x i16> undef to <1 x i32> 595; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a5sb = sext <1 x i16> undef to <1 x i32> 596; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a5sm = mul <1 x i32> %a5sa, %a5sb 597; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a5s = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a5sm) 598; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a6za = zext <2 x i16> undef to <2 x i32> 599; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a6zb = zext <2 x i16> undef to <2 x i32> 600; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %a6zm = mul <2 x i32> %a6za, %a6zb 601; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %a6z = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a6zm) 602; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a6sa = sext <2 x i16> undef to <2 x i32> 603; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a6sb = sext <2 x i16> undef to <2 x i32> 604; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %a6sm = mul <2 x i32> %a6sa, %a6sb 605; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %a6s = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a6sm) 606; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a7za = zext <4 x i16> undef to <4 x i32> 607; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a7zb = zext <4 x i16> undef to <4 x i32> 608; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a7zm = mul <4 x i32> %a7za, %a7zb 609; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a7z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a7zm) 610; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a7sa = sext <4 x i16> undef to <4 x i32> 611; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a7sb = sext <4 x i16> undef to <4 x i32> 612; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a7sm = mul <4 x i32> %a7sa, %a7sb 613; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a7s = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a7sm) 614; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %a8za = zext <8 x i16> undef to <8 x i32> 615; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %a8zb = zext <8 x i16> undef to <8 x i32> 616; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a8zm = mul <8 x i32> %a8za, %a8zb 617; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a8z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a8zm) 618; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %a8sa = sext <8 x i16> undef to <8 x i32> 619; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %a8sb = sext <8 x i16> undef to <8 x i32> 620; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a8sm = mul <8 x i32> %a8sa, %a8sb 621; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a8s = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a8sm) 622; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %a9za = zext <16 x i16> undef to <16 x i32> 623; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %a9zb = zext <16 x i16> undef to <16 x i32> 624; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a9zm = mul <16 x i32> %a9za, %a9zb 625; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a9z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a9zm) 626; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %a9sa = sext <16 x i16> undef to <16 x i32> 627; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %a9sb = sext <16 x i16> undef to <16 x i32> 628; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a9sm = mul <16 x i32> %a9sa, %a9sb 629; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a9s = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a9sm) 630; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a10m = mul <1 x i32> undef, undef 631; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a10 = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a10m) 632; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %a11m = mul <2 x i32> undef, undef 633; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %a11 = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a11m) 634; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a12m = mul <4 x i32> undef, undef 635; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a12 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a12m) 636; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a13m = mul <8 x i32> undef, undef 637; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a13 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a13m) 638; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a14m = mul <16 x i32> undef, undef 639; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a14 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a14m) 640; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void 641; 642 %a0za = zext <1 x i8> undef to <1 x i32> 643 %a0zb = zext <1 x i8> undef to <1 x i32> 644 %a0zm = mul <1 x i32> %a0za, %a0zb 645 %a0z = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a0zm) 646 647 %a0sa = sext <1 x i8> undef to <1 x i32> 648 %a0sb = sext <1 x i8> undef to <1 x i32> 649 %a0sm = mul <1 x i32> %a0sa, %a0sb 650 %a0s = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a0sm) 651 652 %a1za = zext <2 x i8> undef to <2 x i32> 653 %a1zb = zext <2 x i8> undef to <2 x i32> 654 %a1zm = mul <2 x i32> %a1za, %a1zb 655 %a1z = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a1zm) 656 657 %a1sa = sext <2 x i8> undef to <2 x i32> 658 %a1sb = sext <2 x i8> undef to <2 x i32> 659 %a1sm = mul <2 x i32> %a1sa, %a1sb 660 %a1s = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a1sm) 661 662 %a2za = zext <4 x i8> undef to <4 x i32> 663 %a2zb = zext <4 x i8> undef to <4 x i32> 664 %a2zm = mul <4 x i32> %a2za, %a2zb 665 %a2z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a2zm) 666 667 %a2sa = sext <4 x i8> undef to <4 x i32> 668 %a2sb = sext <4 x i8> undef to <4 x i32> 669 %a2sm = mul <4 x i32> %a2sa, %a2sb 670 %a2s = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a2sm) 671 672 %a3za = zext <8 x i8> undef to <8 x i32> 673 %a3zb = zext <8 x i8> undef to <8 x i32> 674 %a3zm = mul <8 x i32> %a3za, %a3zb 675 %a3z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a3zm) 676 677 %a3sa = sext <8 x i8> undef to <8 x i32> 678 %a3sb = sext <8 x i8> undef to <8 x i32> 679 %a3sm = mul <8 x i32> %a3sa, %a3sb 680 %a3s = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a3sm) 681 682 %a4za = zext <16 x i8> undef to <16 x i32> 683 %a4zb = zext <16 x i8> undef to <16 x i32> 684 %a4zm = mul <16 x i32> %a4za, %a4zb 685 %a4z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a4zm) 686 687 %a4sa = sext <16 x i8> undef to <16 x i32> 688 %a4sb = sext <16 x i8> undef to <16 x i32> 689 %a4sm = mul <16 x i32> %a4sa, %a4sb 690 %a4s = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a4sm) 691 692 %a5za = zext <1 x i16> undef to <1 x i32> 693 %a5zb = zext <1 x i16> undef to <1 x i32> 694 %a5zm = mul <1 x i32> %a5za, %a5zb 695 %a5z = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a5zm) 696 697 %a5sa = sext <1 x i16> undef to <1 x i32> 698 %a5sb = sext <1 x i16> undef to <1 x i32> 699 %a5sm = mul <1 x i32> %a5sa, %a5sb 700 %a5s = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a5sm) 701 702 %a6za = zext <2 x i16> undef to <2 x i32> 703 %a6zb = zext <2 x i16> undef to <2 x i32> 704 %a6zm = mul <2 x i32> %a6za, %a6zb 705 %a6z = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a6zm) 706 707 %a6sa = sext <2 x i16> undef to <2 x i32> 708 %a6sb = sext <2 x i16> undef to <2 x i32> 709 %a6sm = mul <2 x i32> %a6sa, %a6sb 710 %a6s = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a6sm) 711 712 %a7za = zext <4 x i16> undef to <4 x i32> 713 %a7zb = zext <4 x i16> undef to <4 x i32> 714 %a7zm = mul <4 x i32> %a7za, %a7zb 715 %a7z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a7zm) 716 717 %a7sa = sext <4 x i16> undef to <4 x i32> 718 %a7sb = sext <4 x i16> undef to <4 x i32> 719 %a7sm = mul <4 x i32> %a7sa, %a7sb 720 %a7s = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a7sm) 721 722 %a8za = zext <8 x i16> undef to <8 x i32> 723 %a8zb = zext <8 x i16> undef to <8 x i32> 724 %a8zm = mul <8 x i32> %a8za, %a8zb 725 %a8z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a8zm) 726 727 %a8sa = sext <8 x i16> undef to <8 x i32> 728 %a8sb = sext <8 x i16> undef to <8 x i32> 729 %a8sm = mul <8 x i32> %a8sa, %a8sb 730 %a8s = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a8sm) 731 732 %a9za = zext <16 x i16> undef to <16 x i32> 733 %a9zb = zext <16 x i16> undef to <16 x i32> 734 %a9zm = mul <16 x i32> %a9za, %a9zb 735 %a9z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a9zm) 736 737 %a9sa = sext <16 x i16> undef to <16 x i32> 738 %a9sb = sext <16 x i16> undef to <16 x i32> 739 %a9sm = mul <16 x i32> %a9sa, %a9sb 740 %a9s = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a9sm) 741 742 %a10m = mul <1 x i32> undef, undef 743 %a10 = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a10m) 744 745 %a11m = mul <2 x i32> undef, undef 746 %a11 = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a11m) 747 748 %a12m = mul <4 x i32> undef, undef 749 %a12 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a12m) 750 751 %a13m = mul <8 x i32> undef, undef 752 %a13 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a13m) 753 754 %a14m = mul <16 x i32> undef, undef 755 %a14 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a14m) 756 757 ret void 758} 759 760define void @mla_i64() { 761; CHECK-LABEL: 'mla_i64' 762; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %a0za = zext <1 x i8> undef to <1 x i64> 763; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %a0zb = zext <1 x i8> undef to <1 x i64> 764; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a0zm = mul <1 x i64> %a0za, %a0zb 765; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a0z = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a0zm) 766; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %a0sa = sext <1 x i8> undef to <1 x i64> 767; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %a0sb = sext <1 x i8> undef to <1 x i64> 768; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a0sm = mul <1 x i64> %a0sa, %a0sb 769; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a0s = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a0sm) 770; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a1za = zext <2 x i8> undef to <2 x i64> 771; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a1zb = zext <2 x i8> undef to <2 x i64> 772; CHECK-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %a1zm = mul <2 x i64> %a1za, %a1zb 773; CHECK-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %a1z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a1zm) 774; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %a1sa = sext <2 x i8> undef to <2 x i64> 775; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %a1sb = sext <2 x i8> undef to <2 x i64> 776; CHECK-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %a1sm = mul <2 x i64> %a1sa, %a1sb 777; CHECK-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %a1s = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a1sm) 778; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %a2za = zext <4 x i8> undef to <4 x i64> 779; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %a2zb = zext <4 x i8> undef to <4 x i64> 780; CHECK-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %a2zm = mul <4 x i64> %a2za, %a2zb 781; CHECK-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %a2z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a2zm) 782; CHECK-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %a2sa = sext <4 x i8> undef to <4 x i64> 783; CHECK-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %a2sb = sext <4 x i8> undef to <4 x i64> 784; CHECK-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %a2sm = mul <4 x i64> %a2sa, %a2sb 785; CHECK-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %a2s = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a2sm) 786; CHECK-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %a3za = zext <8 x i8> undef to <8 x i64> 787; CHECK-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %a3zb = zext <8 x i8> undef to <8 x i64> 788; CHECK-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %a3zm = mul <8 x i64> %a3za, %a3zb 789; CHECK-NEXT: Cost Model: Found an estimated cost of 408 for instruction: %a3z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a3zm) 790; CHECK-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %a3sa = sext <8 x i8> undef to <8 x i64> 791; CHECK-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %a3sb = sext <8 x i8> undef to <8 x i64> 792; CHECK-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %a3sm = mul <8 x i64> %a3sa, %a3sb 793; CHECK-NEXT: Cost Model: Found an estimated cost of 408 for instruction: %a3s = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a3sm) 794; CHECK-NEXT: Cost Model: Found an estimated cost of 298 for instruction: %a4za = zext <16 x i8> undef to <16 x i64> 795; CHECK-NEXT: Cost Model: Found an estimated cost of 298 for instruction: %a4zb = zext <16 x i8> undef to <16 x i64> 796; CHECK-NEXT: Cost Model: Found an estimated cost of 416 for instruction: %a4zm = mul <16 x i64> %a4za, %a4zb 797; CHECK-NEXT: Cost Model: Found an estimated cost of 808 for instruction: %a4z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a4zm) 798; CHECK-NEXT: Cost Model: Found an estimated cost of 1322 for instruction: %a4sa = sext <16 x i8> undef to <16 x i64> 799; CHECK-NEXT: Cost Model: Found an estimated cost of 1322 for instruction: %a4sb = sext <16 x i8> undef to <16 x i64> 800; CHECK-NEXT: Cost Model: Found an estimated cost of 416 for instruction: %a4sm = mul <16 x i64> %a4sa, %a4sb 801; CHECK-NEXT: Cost Model: Found an estimated cost of 808 for instruction: %a4s = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a4sm) 802; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %a5za = zext <1 x i16> undef to <1 x i64> 803; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %a5zb = zext <1 x i16> undef to <1 x i64> 804; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a5zm = mul <1 x i64> %a5za, %a5zb 805; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a5z = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a5zm) 806; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %a5sa = sext <1 x i16> undef to <1 x i64> 807; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %a5sb = sext <1 x i16> undef to <1 x i64> 808; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a5sm = mul <1 x i64> %a5sa, %a5sb 809; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a5s = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a5sm) 810; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a6za = zext <2 x i16> undef to <2 x i64> 811; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a6zb = zext <2 x i16> undef to <2 x i64> 812; CHECK-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %a6zm = mul <2 x i64> %a6za, %a6zb 813; CHECK-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %a6z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a6zm) 814; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %a6sa = sext <2 x i16> undef to <2 x i64> 815; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %a6sb = sext <2 x i16> undef to <2 x i64> 816; CHECK-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %a6sm = mul <2 x i64> %a6sa, %a6sb 817; CHECK-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %a6s = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a6sm) 818; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %a7za = zext <4 x i16> undef to <4 x i64> 819; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %a7zb = zext <4 x i16> undef to <4 x i64> 820; CHECK-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %a7zm = mul <4 x i64> %a7za, %a7zb 821; CHECK-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %a7z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a7zm) 822; CHECK-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %a7sa = sext <4 x i16> undef to <4 x i64> 823; CHECK-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %a7sb = sext <4 x i16> undef to <4 x i64> 824; CHECK-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %a7sm = mul <4 x i64> %a7sa, %a7sb 825; CHECK-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %a7s = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a7sm) 826; CHECK-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %a8za = zext <8 x i16> undef to <8 x i64> 827; CHECK-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %a8zb = zext <8 x i16> undef to <8 x i64> 828; CHECK-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %a8zm = mul <8 x i64> %a8za, %a8zb 829; CHECK-NEXT: Cost Model: Found an estimated cost of 408 for instruction: %a8z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a8zm) 830; CHECK-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %a8sa = sext <8 x i16> undef to <8 x i64> 831; CHECK-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %a8sb = sext <8 x i16> undef to <8 x i64> 832; CHECK-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %a8sm = mul <8 x i64> %a8sa, %a8sb 833; CHECK-NEXT: Cost Model: Found an estimated cost of 408 for instruction: %a8s = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a8sm) 834; CHECK-NEXT: Cost Model: Found an estimated cost of 296 for instruction: %a9za = zext <16 x i16> undef to <16 x i64> 835; CHECK-NEXT: Cost Model: Found an estimated cost of 296 for instruction: %a9zb = zext <16 x i16> undef to <16 x i64> 836; CHECK-NEXT: Cost Model: Found an estimated cost of 416 for instruction: %a9zm = mul <16 x i64> %a9za, %a9zb 837; CHECK-NEXT: Cost Model: Found an estimated cost of 808 for instruction: %a9z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a9zm) 838; CHECK-NEXT: Cost Model: Found an estimated cost of 1320 for instruction: %a9sa = sext <16 x i16> undef to <16 x i64> 839; CHECK-NEXT: Cost Model: Found an estimated cost of 1320 for instruction: %a9sb = sext <16 x i16> undef to <16 x i64> 840; CHECK-NEXT: Cost Model: Found an estimated cost of 416 for instruction: %a9sm = mul <16 x i64> %a9sa, %a9sb 841; CHECK-NEXT: Cost Model: Found an estimated cost of 808 for instruction: %a9s = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a9sm) 842; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %a10za = zext <1 x i32> undef to <1 x i64> 843; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %a10zb = zext <1 x i32> undef to <1 x i64> 844; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a10zm = mul <1 x i64> %a10za, %a10zb 845; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a10z = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a10zm) 846; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %a10sa = sext <1 x i32> undef to <1 x i64> 847; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %a10sb = sext <1 x i32> undef to <1 x i64> 848; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a10sm = mul <1 x i64> %a10sa, %a10sb 849; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a10s = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a10sm) 850; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a11za = zext <2 x i32> undef to <2 x i64> 851; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a11zb = zext <2 x i32> undef to <2 x i64> 852; CHECK-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %a11zm = mul <2 x i64> %a11za, %a11zb 853; CHECK-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %a11z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a11zm) 854; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %a11sa = sext <2 x i32> undef to <2 x i64> 855; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %a11sb = sext <2 x i32> undef to <2 x i64> 856; CHECK-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %a11sm = mul <2 x i64> %a11sa, %a11sb 857; CHECK-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %a11s = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a11sm) 858; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %a12za = zext <4 x i32> undef to <4 x i64> 859; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %a12zb = zext <4 x i32> undef to <4 x i64> 860; CHECK-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %a12zm = mul <4 x i64> %a12za, %a12zb 861; CHECK-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %a12z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a12zm) 862; CHECK-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %a12sa = sext <4 x i32> undef to <4 x i64> 863; CHECK-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %a12sb = sext <4 x i32> undef to <4 x i64> 864; CHECK-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %a12sm = mul <4 x i64> %a12sa, %a12sb 865; CHECK-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %a12s = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a12sm) 866; CHECK-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %a13za = zext <8 x i32> undef to <8 x i64> 867; CHECK-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %a13zb = zext <8 x i32> undef to <8 x i64> 868; CHECK-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %a13zm = mul <8 x i64> %a13za, %a13zb 869; CHECK-NEXT: Cost Model: Found an estimated cost of 408 for instruction: %a13z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a13zm) 870; CHECK-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %a13sa = sext <8 x i32> undef to <8 x i64> 871; CHECK-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %a13sb = sext <8 x i32> undef to <8 x i64> 872; CHECK-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %a13sm = mul <8 x i64> %a13sa, %a13sb 873; CHECK-NEXT: Cost Model: Found an estimated cost of 408 for instruction: %a13s = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a13sm) 874; CHECK-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %a14za = zext <16 x i32> undef to <16 x i64> 875; CHECK-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %a14zb = zext <16 x i32> undef to <16 x i64> 876; CHECK-NEXT: Cost Model: Found an estimated cost of 416 for instruction: %a14zm = mul <16 x i64> %a14za, %a14zb 877; CHECK-NEXT: Cost Model: Found an estimated cost of 808 for instruction: %a14z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a14zm) 878; CHECK-NEXT: Cost Model: Found an estimated cost of 1056 for instruction: %a14sa = sext <16 x i32> undef to <16 x i64> 879; CHECK-NEXT: Cost Model: Found an estimated cost of 1056 for instruction: %a14sb = sext <16 x i32> undef to <16 x i64> 880; CHECK-NEXT: Cost Model: Found an estimated cost of 416 for instruction: %a14sm = mul <16 x i64> %a14sa, %a14sb 881; CHECK-NEXT: Cost Model: Found an estimated cost of 808 for instruction: %a14s = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a14sm) 882; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a15m = mul <1 x i64> undef, undef 883; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a15 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a15m) 884; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %a16m = mul <2 x i64> undef, undef 885; CHECK-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %a16 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a16m) 886; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %a17m = mul <4 x i64> undef, undef 887; CHECK-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %a17 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a17m) 888; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %a18m = mul <8 x i64> undef, undef 889; CHECK-NEXT: Cost Model: Found an estimated cost of 408 for instruction: %a18 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a18m) 890; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %a19m = mul <16 x i64> undef, undef 891; CHECK-NEXT: Cost Model: Found an estimated cost of 808 for instruction: %a19 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a19m) 892; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void 893; 894 %a0za = zext <1 x i8> undef to <1 x i64> 895 %a0zb = zext <1 x i8> undef to <1 x i64> 896 %a0zm = mul <1 x i64> %a0za, %a0zb 897 %a0z = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a0zm) 898 899 %a0sa = sext <1 x i8> undef to <1 x i64> 900 %a0sb = sext <1 x i8> undef to <1 x i64> 901 %a0sm = mul <1 x i64> %a0sa, %a0sb 902 %a0s = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a0sm) 903 904 %a1za = zext <2 x i8> undef to <2 x i64> 905 %a1zb = zext <2 x i8> undef to <2 x i64> 906 %a1zm = mul <2 x i64> %a1za, %a1zb 907 %a1z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a1zm) 908 909 %a1sa = sext <2 x i8> undef to <2 x i64> 910 %a1sb = sext <2 x i8> undef to <2 x i64> 911 %a1sm = mul <2 x i64> %a1sa, %a1sb 912 %a1s = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a1sm) 913 914 %a2za = zext <4 x i8> undef to <4 x i64> 915 %a2zb = zext <4 x i8> undef to <4 x i64> 916 %a2zm = mul <4 x i64> %a2za, %a2zb 917 %a2z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a2zm) 918 919 %a2sa = sext <4 x i8> undef to <4 x i64> 920 %a2sb = sext <4 x i8> undef to <4 x i64> 921 %a2sm = mul <4 x i64> %a2sa, %a2sb 922 %a2s = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a2sm) 923 924 %a3za = zext <8 x i8> undef to <8 x i64> 925 %a3zb = zext <8 x i8> undef to <8 x i64> 926 %a3zm = mul <8 x i64> %a3za, %a3zb 927 %a3z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a3zm) 928 929 %a3sa = sext <8 x i8> undef to <8 x i64> 930 %a3sb = sext <8 x i8> undef to <8 x i64> 931 %a3sm = mul <8 x i64> %a3sa, %a3sb 932 %a3s = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a3sm) 933 934 %a4za = zext <16 x i8> undef to <16 x i64> 935 %a4zb = zext <16 x i8> undef to <16 x i64> 936 %a4zm = mul <16 x i64> %a4za, %a4zb 937 %a4z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a4zm) 938 939 %a4sa = sext <16 x i8> undef to <16 x i64> 940 %a4sb = sext <16 x i8> undef to <16 x i64> 941 %a4sm = mul <16 x i64> %a4sa, %a4sb 942 %a4s = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a4sm) 943 944 %a5za = zext <1 x i16> undef to <1 x i64> 945 %a5zb = zext <1 x i16> undef to <1 x i64> 946 %a5zm = mul <1 x i64> %a5za, %a5zb 947 %a5z = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a5zm) 948 949 %a5sa = sext <1 x i16> undef to <1 x i64> 950 %a5sb = sext <1 x i16> undef to <1 x i64> 951 %a5sm = mul <1 x i64> %a5sa, %a5sb 952 %a5s = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a5sm) 953 954 %a6za = zext <2 x i16> undef to <2 x i64> 955 %a6zb = zext <2 x i16> undef to <2 x i64> 956 %a6zm = mul <2 x i64> %a6za, %a6zb 957 %a6z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a6zm) 958 959 %a6sa = sext <2 x i16> undef to <2 x i64> 960 %a6sb = sext <2 x i16> undef to <2 x i64> 961 %a6sm = mul <2 x i64> %a6sa, %a6sb 962 %a6s = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a6sm) 963 964 %a7za = zext <4 x i16> undef to <4 x i64> 965 %a7zb = zext <4 x i16> undef to <4 x i64> 966 %a7zm = mul <4 x i64> %a7za, %a7zb 967 %a7z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a7zm) 968 969 %a7sa = sext <4 x i16> undef to <4 x i64> 970 %a7sb = sext <4 x i16> undef to <4 x i64> 971 %a7sm = mul <4 x i64> %a7sa, %a7sb 972 %a7s = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a7sm) 973 974 %a8za = zext <8 x i16> undef to <8 x i64> 975 %a8zb = zext <8 x i16> undef to <8 x i64> 976 %a8zm = mul <8 x i64> %a8za, %a8zb 977 %a8z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a8zm) 978 979 %a8sa = sext <8 x i16> undef to <8 x i64> 980 %a8sb = sext <8 x i16> undef to <8 x i64> 981 %a8sm = mul <8 x i64> %a8sa, %a8sb 982 %a8s = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a8sm) 983 984 %a9za = zext <16 x i16> undef to <16 x i64> 985 %a9zb = zext <16 x i16> undef to <16 x i64> 986 %a9zm = mul <16 x i64> %a9za, %a9zb 987 %a9z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a9zm) 988 989 %a9sa = sext <16 x i16> undef to <16 x i64> 990 %a9sb = sext <16 x i16> undef to <16 x i64> 991 %a9sm = mul <16 x i64> %a9sa, %a9sb 992 %a9s = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a9sm) 993 994 %a10za = zext <1 x i32> undef to <1 x i64> 995 %a10zb = zext <1 x i32> undef to <1 x i64> 996 %a10zm = mul <1 x i64> %a10za, %a10zb 997 %a10z = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a10zm) 998 999 %a10sa = sext <1 x i32> undef to <1 x i64> 1000 %a10sb = sext <1 x i32> undef to <1 x i64> 1001 %a10sm = mul <1 x i64> %a10sa, %a10sb 1002 %a10s = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a10sm) 1003 1004 %a11za = zext <2 x i32> undef to <2 x i64> 1005 %a11zb = zext <2 x i32> undef to <2 x i64> 1006 %a11zm = mul <2 x i64> %a11za, %a11zb 1007 %a11z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a11zm) 1008 1009 %a11sa = sext <2 x i32> undef to <2 x i64> 1010 %a11sb = sext <2 x i32> undef to <2 x i64> 1011 %a11sm = mul <2 x i64> %a11sa, %a11sb 1012 %a11s = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a11sm) 1013 1014 %a12za = zext <4 x i32> undef to <4 x i64> 1015 %a12zb = zext <4 x i32> undef to <4 x i64> 1016 %a12zm = mul <4 x i64> %a12za, %a12zb 1017 %a12z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a12zm) 1018 1019 %a12sa = sext <4 x i32> undef to <4 x i64> 1020 %a12sb = sext <4 x i32> undef to <4 x i64> 1021 %a12sm = mul <4 x i64> %a12sa, %a12sb 1022 %a12s = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a12sm) 1023 1024 %a13za = zext <8 x i32> undef to <8 x i64> 1025 %a13zb = zext <8 x i32> undef to <8 x i64> 1026 %a13zm = mul <8 x i64> %a13za, %a13zb 1027 %a13z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a13zm) 1028 1029 %a13sa = sext <8 x i32> undef to <8 x i64> 1030 %a13sb = sext <8 x i32> undef to <8 x i64> 1031 %a13sm = mul <8 x i64> %a13sa, %a13sb 1032 %a13s = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a13sm) 1033 1034 %a14za = zext <16 x i32> undef to <16 x i64> 1035 %a14zb = zext <16 x i32> undef to <16 x i64> 1036 %a14zm = mul <16 x i64> %a14za, %a14zb 1037 %a14z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a14zm) 1038 1039 %a14sa = sext <16 x i32> undef to <16 x i64> 1040 %a14sb = sext <16 x i32> undef to <16 x i64> 1041 %a14sm = mul <16 x i64> %a14sa, %a14sb 1042 %a14s = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a14sm) 1043 1044 %a15m = mul <1 x i64> undef, undef 1045 %a15 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a15m) 1046 1047 %a16m = mul <2 x i64> undef, undef 1048 %a16 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a16m) 1049 1050 %a17m = mul <4 x i64> undef, undef 1051 %a17 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a17m) 1052 1053 %a18m = mul <8 x i64> undef, undef 1054 %a18 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a18m) 1055 1056 %a19m = mul <16 x i64> undef, undef 1057 %a19 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a19m) 1058 1059 ret void 1060} 1061 1062declare i16 @llvm.vector.reduce.add.v16i16(<16 x i16>) 1063declare i16 @llvm.vector.reduce.add.v1i16(<1 x i16>) 1064declare i16 @llvm.vector.reduce.add.v2i16(<2 x i16>) 1065declare i16 @llvm.vector.reduce.add.v4i16(<4 x i16>) 1066declare i16 @llvm.vector.reduce.add.v8i16(<8 x i16>) 1067declare i32 @llvm.vector.reduce.add.v16i32(<16 x i32>) 1068declare i32 @llvm.vector.reduce.add.v1i32(<1 x i32>) 1069declare i32 @llvm.vector.reduce.add.v2i32(<2 x i32>) 1070declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) 1071declare i32 @llvm.vector.reduce.add.v8i32(<8 x i32>) 1072declare i64 @llvm.vector.reduce.add.v16i64(<16 x i64>) 1073declare i64 @llvm.vector.reduce.add.v1i64(<1 x i64>) 1074declare i64 @llvm.vector.reduce.add.v2i64(<2 x i64>) 1075declare i64 @llvm.vector.reduce.add.v4i64(<4 x i64>) 1076declare i64 @llvm.vector.reduce.add.v8i64(<8 x i64>) 1077declare i8 @llvm.vector.reduce.add.v16i8(<16 x i8>) 1078declare i8 @llvm.vector.reduce.add.v1i8(<1 x i8>) 1079declare i8 @llvm.vector.reduce.add.v2i8(<2 x i8>) 1080declare i8 @llvm.vector.reduce.add.v4i8(<4 x i8>) 1081declare i8 @llvm.vector.reduce.add.v8i8(<8 x i8>) 1082