1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -mtriple=thumbv7-unknown-linux-gnueabihf -arm-parallel-dsp -dce %s -S -o - | FileCheck %s 3 4define i32 @first_mul_invalid(ptr nocapture readonly %in, ptr nocapture readonly %b) { 5; CHECK-LABEL: @first_mul_invalid( 6; CHECK-NEXT: entry: 7; CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[IN:%.*]], align 2 8; CHECK-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32 9; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[B:%.*]], align 2 10; CHECK-NEXT: [[CONV2:%.*]] = sext i16 [[TMP1]] to i32 11; CHECK-NEXT: [[CALL:%.*]] = tail call i32 @bar(i32 [[CONV]], i32 [[CONV2]]) 12; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i16, ptr [[IN]], i32 -1 13; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr [[ARRAYIDX3]], align 2 14; CHECK-NEXT: [[CONV4:%.*]] = sext i16 [[TMP2]] to i32 15; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 1 16; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr [[ARRAYIDX5]], align 2 17; CHECK-NEXT: [[CONV6:%.*]] = sext i16 [[TMP3]] to i32 18; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[CONV6]], [[CONV4]] 19; CHECK-NEXT: [[ADD:%.*]] = add i32 [[MUL]], [[CALL]] 20; CHECK-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds i16, ptr [[IN]], i32 -3 21; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX13]], align 2 22; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 2 23; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX9]], align 2 24; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.arm.smladx(i32 [[TMP5]], i32 [[TMP7]], i32 [[ADD]]) 25; CHECK-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds i16, ptr [[IN]], i32 -5 26; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX25]], align 2 27; CHECK-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 4 28; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX21]], align 2 29; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.arm.smladx(i32 [[TMP10]], i32 [[TMP12]], i32 [[TMP8]]) 30; CHECK-NEXT: ret i32 [[TMP13]] 31; 32entry: 33 %0 = load i16, ptr %in, align 2 34 %conv = sext i16 %0 to i32 35 %1 = load i16, ptr %b, align 2 36 %conv2 = sext i16 %1 to i32 37 %call = tail call i32 @bar(i32 %conv, i32 %conv2) 38 %arrayidx3 = getelementptr inbounds i16, ptr %in, i32 -1 39 %2 = load i16, ptr %arrayidx3, align 2 40 %conv4 = sext i16 %2 to i32 41 %arrayidx5 = getelementptr inbounds i16, ptr %b, i32 1 42 %3 = load i16, ptr %arrayidx5, align 2 43 %conv6 = sext i16 %3 to i32 44 %mul = mul nsw i32 %conv6, %conv4 45 %add = add i32 %mul, %call 46 %arrayidx7 = getelementptr inbounds i16, ptr %in, i32 -2 47 %4 = load i16, ptr %arrayidx7, align 2 48 %conv8 = sext i16 %4 to i32 49 %arrayidx9 = getelementptr inbounds i16, ptr %b, i32 2 50 %5 = load i16, ptr %arrayidx9, align 2 51 %conv10 = sext i16 %5 to i32 52 %mul11 = mul nsw i32 %conv10, %conv8 53 %add12 = add i32 %add, %mul11 54 %arrayidx13 = getelementptr inbounds i16, ptr %in, i32 -3 55 %6 = load i16, ptr %arrayidx13, align 2 56 %conv14 = sext i16 %6 to i32 57 %arrayidx15 = getelementptr inbounds i16, ptr %b, i32 3 58 %7 = load i16, ptr %arrayidx15, align 2 59 %conv16 = sext i16 %7 to i32 60 %mul17 = mul nsw i32 %conv16, %conv14 61 %add18 = add i32 %add12, %mul17 62 %arrayidx19 = getelementptr inbounds i16, ptr %in, i32 -4 63 %8 = load i16, ptr %arrayidx19, align 2 64 %conv20 = sext i16 %8 to i32 65 %arrayidx21 = getelementptr inbounds i16, ptr %b, i32 4 66 %9 = load i16, ptr %arrayidx21, align 2 67 %conv22 = sext i16 %9 to i32 68 %mul23 = mul nsw i32 %conv22, %conv20 69 %add24 = add i32 %add18, %mul23 70 %arrayidx25 = getelementptr inbounds i16, ptr %in, i32 -5 71 %10 = load i16, ptr %arrayidx25, align 2 72 %conv26 = sext i16 %10 to i32 73 %arrayidx27 = getelementptr inbounds i16, ptr %b, i32 5 74 %11 = load i16, ptr %arrayidx27, align 2 75 %conv28 = sext i16 %11 to i32 76 %mul29 = mul nsw i32 %conv28, %conv26 77 %add30 = add i32 %add24, %mul29 78 ret i32 %add30 79} 80 81define i32 @with_no_acc_input(ptr nocapture readonly %in, ptr nocapture readonly %b) { 82; CHECK-LABEL: @with_no_acc_input( 83; CHECK-NEXT: entry: 84; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i16, ptr [[IN:%.*]], i32 -1 85; CHECK-NEXT: [[LD_2:%.*]] = load i16, ptr [[ARRAYIDX3]], align 2 86; CHECK-NEXT: [[CONV4:%.*]] = sext i16 [[LD_2]] to i32 87; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, ptr [[B:%.*]], i32 1 88; CHECK-NEXT: [[LD_3:%.*]] = load i16, ptr [[ARRAYIDX5]], align 2 89; CHECK-NEXT: [[CONV6:%.*]] = sext i16 [[LD_3]] to i32 90; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[CONV6]], [[CONV4]] 91; CHECK-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds i16, ptr [[IN]], i32 -3 92; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX13]], align 2 93; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 2 94; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX9]], align 2 95; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.smladx(i32 [[TMP1]], i32 [[TMP3]], i32 [[MUL]]) 96; CHECK-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds i16, ptr [[IN]], i32 -5 97; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX25]], align 2 98; CHECK-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 4 99; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[ARRAYIDX21]], align 2 100; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.arm.smladx(i32 [[TMP6]], i32 [[TMP8]], i32 [[TMP4]]) 101; CHECK-NEXT: ret i32 [[TMP9]] 102; 103entry: 104 %arrayidx3 = getelementptr inbounds i16, ptr %in, i32 -1 105 %ld.2 = load i16, ptr %arrayidx3, align 2 106 %conv4 = sext i16 %ld.2 to i32 107 %arrayidx5 = getelementptr inbounds i16, ptr %b, i32 1 108 %ld.3 = load i16, ptr %arrayidx5, align 2 109 %conv6 = sext i16 %ld.3 to i32 110 %mul = mul nsw i32 %conv6, %conv4 111 %arrayidx7 = getelementptr inbounds i16, ptr %in, i32 -2 112 %ld.4 = load i16, ptr %arrayidx7, align 2 113 %conv8 = sext i16 %ld.4 to i32 114 %arrayidx9 = getelementptr inbounds i16, ptr %b, i32 2 115 %ld.5 = load i16, ptr %arrayidx9, align 2 116 %conv10 = sext i16 %ld.5 to i32 117 %mul11 = mul nsw i32 %conv10, %conv8 118 %add12 = add i32 %mul, %mul11 119 %arrayidx13 = getelementptr inbounds i16, ptr %in, i32 -3 120 %ld.6 = load i16, ptr %arrayidx13, align 2 121 %conv14 = sext i16 %ld.6 to i32 122 %arrayidx15 = getelementptr inbounds i16, ptr %b, i32 3 123 %ld.7 = load i16, ptr %arrayidx15, align 2 124 %conv16 = sext i16 %ld.7 to i32 125 %mul17 = mul nsw i32 %conv16, %conv14 126 %add18 = add i32 %add12, %mul17 127 %arrayidx19 = getelementptr inbounds i16, ptr %in, i32 -4 128 %ld.8 = load i16, ptr %arrayidx19, align 2 129 %conv20 = sext i16 %ld.8 to i32 130 %arrayidx21 = getelementptr inbounds i16, ptr %b, i32 4 131 %ld.9 = load i16, ptr %arrayidx21, align 2 132 %conv22 = sext i16 %ld.9 to i32 133 %mul23 = mul nsw i32 %conv22, %conv20 134 %add24 = add i32 %add18, %mul23 135 %arrayidx25 = getelementptr inbounds i16, ptr %in, i32 -5 136 %ld.10 = load i16, ptr %arrayidx25, align 2 137 %conv26 = sext i16 %ld.10 to i32 138 %arrayidx27 = getelementptr inbounds i16, ptr %b, i32 5 139 %ld.11 = load i16, ptr %arrayidx27, align 2 140 %conv28 = sext i16 %ld.11 to i32 141 %mul29 = mul nsw i32 %conv28, %conv26 142 %add30 = add i32 %add24, %mul29 143 ret i32 %add30 144} 145 146define i64 @with_64bit_acc(ptr nocapture readonly %in, ptr nocapture readonly %b) { 147; CHECK-LABEL: @with_64bit_acc( 148; CHECK-NEXT: entry: 149; CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[IN:%.*]], align 2 150; CHECK-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32 151; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[B:%.*]], align 2 152; CHECK-NEXT: [[CONV2:%.*]] = sext i16 [[TMP1]] to i32 153; CHECK-NEXT: [[CALL:%.*]] = tail call i32 @bar(i32 [[CONV]], i32 [[CONV2]]) 154; CHECK-NEXT: [[SEXT_0:%.*]] = sext i32 [[CALL]] to i64 155; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i16, ptr [[IN]], i32 -1 156; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr [[ARRAYIDX3]], align 2 157; CHECK-NEXT: [[CONV4:%.*]] = sext i16 [[TMP2]] to i32 158; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 1 159; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr [[ARRAYIDX5]], align 2 160; CHECK-NEXT: [[CONV6:%.*]] = sext i16 [[TMP3]] to i32 161; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[CONV6]], [[CONV4]] 162; CHECK-NEXT: [[SEXT_1:%.*]] = sext i32 [[MUL]] to i64 163; CHECK-NEXT: [[ADD:%.*]] = add i64 [[SEXT_0]], [[SEXT_1]] 164; CHECK-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds i16, ptr [[IN]], i32 -3 165; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX13]], align 2 166; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 2 167; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX9]], align 2 168; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.arm.smlaldx(i32 [[TMP5]], i32 [[TMP7]], i64 [[ADD]]) 169; CHECK-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds i16, ptr [[IN]], i32 -5 170; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX25]], align 2 171; CHECK-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 4 172; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX21]], align 2 173; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.arm.smlaldx(i32 [[TMP10]], i32 [[TMP12]], i64 [[TMP8]]) 174; CHECK-NEXT: ret i64 [[TMP13]] 175; 176entry: 177 %0 = load i16, ptr %in, align 2 178 %conv = sext i16 %0 to i32 179 %1 = load i16, ptr %b, align 2 180 %conv2 = sext i16 %1 to i32 181 %call = tail call i32 @bar(i32 %conv, i32 %conv2) 182 %sext.0 = sext i32 %call to i64 183 %arrayidx3 = getelementptr inbounds i16, ptr %in, i32 -1 184 %2 = load i16, ptr %arrayidx3, align 2 185 %conv4 = sext i16 %2 to i32 186 %arrayidx5 = getelementptr inbounds i16, ptr %b, i32 1 187 %3 = load i16, ptr %arrayidx5, align 2 188 %conv6 = sext i16 %3 to i32 189 %mul = mul nsw i32 %conv6, %conv4 190 %sext.1 = sext i32 %mul to i64 191 %add = add i64 %sext.0, %sext.1 192 %arrayidx7 = getelementptr inbounds i16, ptr %in, i32 -2 193 %4 = load i16, ptr %arrayidx7, align 2 194 %conv8 = sext i16 %4 to i32 195 %arrayidx9 = getelementptr inbounds i16, ptr %b, i32 2 196 %5 = load i16, ptr %arrayidx9, align 2 197 %conv10 = sext i16 %5 to i32 198 %mul11 = mul nsw i32 %conv10, %conv8 199 %sext.2 = sext i32 %mul11 to i64 200 %add12 = add i64 %add, %sext.2 201 %arrayidx13 = getelementptr inbounds i16, ptr %in, i32 -3 202 %6 = load i16, ptr %arrayidx13, align 2 203 %conv14 = sext i16 %6 to i32 204 %arrayidx15 = getelementptr inbounds i16, ptr %b, i32 3 205 %7 = load i16, ptr %arrayidx15, align 2 206 %conv16 = sext i16 %7 to i32 207 %mul17 = mul nsw i32 %conv16, %conv14 208 %sext.3 = sext i32 %mul17 to i64 209 %add18 = add i64 %add12, %sext.3 210 %arrayidx19 = getelementptr inbounds i16, ptr %in, i32 -4 211 %8 = load i16, ptr %arrayidx19, align 2 212 %conv20 = sext i16 %8 to i32 213 %arrayidx21 = getelementptr inbounds i16, ptr %b, i32 4 214 %9 = load i16, ptr %arrayidx21, align 2 215 %conv22 = sext i16 %9 to i32 216 %mul23 = mul nsw i32 %conv22, %conv20 217 %sext.4 = sext i32 %mul23 to i64 218 %add24 = add i64 %add18, %sext.4 219 %arrayidx25 = getelementptr inbounds i16, ptr %in, i32 -5 220 %10 = load i16, ptr %arrayidx25, align 2 221 %conv26 = sext i16 %10 to i32 222 %arrayidx27 = getelementptr inbounds i16, ptr %b, i32 5 223 %11 = load i16, ptr %arrayidx27, align 2 224 %conv28 = sext i16 %11 to i32 225 %mul29 = mul nsw i32 %conv28, %conv26 226 %sext.5 = sext i32 %mul29 to i64 227 %add30 = add i64 %add24, %sext.5 228 ret i64 %add30 229} 230 231define i64 @with_64bit_add_acc(ptr nocapture readonly %px.10756.unr, ptr nocapture readonly %py.8757.unr, i32 %acc) { 232; CHECK-LABEL: @with_64bit_add_acc( 233; CHECK-NEXT: entry: 234; CHECK-NEXT: [[SUM_3758_UNR:%.*]] = sext i32 [[ACC:%.*]] to i64 235; CHECK-NEXT: br label [[BB_1:%.*]] 236; CHECK: bb.1: 237; CHECK-NEXT: [[INCDEC_PTR184_EPIL:%.*]] = getelementptr inbounds i16, ptr [[PX_10756_UNR:%.*]], i32 1 238; CHECK-NEXT: [[TMP216:%.*]] = load i16, ptr [[PX_10756_UNR]], align 2 239; CHECK-NEXT: [[CONV185_EPIL:%.*]] = sext i16 [[TMP216]] to i32 240; CHECK-NEXT: [[INCDEC_PTR186_EPIL:%.*]] = getelementptr inbounds i16, ptr [[PY_8757_UNR:%.*]], i32 -1 241; CHECK-NEXT: [[TMP217:%.*]] = load i16, ptr [[PY_8757_UNR]], align 2 242; CHECK-NEXT: [[CONV187_EPIL:%.*]] = sext i16 [[TMP217]] to i32 243; CHECK-NEXT: [[MUL_EPIL:%.*]] = mul nsw i32 [[CONV187_EPIL]], [[CONV185_EPIL]] 244; CHECK-NEXT: [[CONV188_EPIL:%.*]] = sext i32 [[MUL_EPIL]] to i64 245; CHECK-NEXT: [[ADD189_EPIL:%.*]] = add nsw i64 [[SUM_3758_UNR]], [[CONV188_EPIL]] 246; CHECK-NEXT: [[INCDEC_PTR190_EPIL:%.*]] = getelementptr inbounds i16, ptr [[PX_10756_UNR]], i32 2 247; CHECK-NEXT: [[TMP218:%.*]] = load i16, ptr [[INCDEC_PTR184_EPIL]], align 2 248; CHECK-NEXT: [[CONV191_EPIL:%.*]] = sext i16 [[TMP218]] to i32 249; CHECK-NEXT: [[TMP219:%.*]] = load i16, ptr [[INCDEC_PTR186_EPIL]], align 2 250; CHECK-NEXT: [[CONV193_EPIL:%.*]] = sext i16 [[TMP219]] to i32 251; CHECK-NEXT: [[MUL194_EPIL:%.*]] = mul nsw i32 [[CONV193_EPIL]], [[CONV191_EPIL]] 252; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[MUL194_EPIL]] to i64 253; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[TMP0]], [[ADD189_EPIL]] 254; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[INCDEC_PTR190_EPIL]], align 2 255; CHECK-NEXT: [[INCDEC_PTR199_EPIL:%.*]] = getelementptr inbounds i16, ptr [[PY_8757_UNR]], i32 -3 256; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[INCDEC_PTR199_EPIL]], align 2 257; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.arm.smlaldx(i32 [[TMP5]], i32 [[TMP3]], i64 [[TMP1]]) 258; CHECK-NEXT: ret i64 [[TMP6]] 259; 260entry: 261 %sum.3758.unr = sext i32 %acc to i64 262 br label %bb.1 263 264bb.1: 265 %incdec.ptr184.epil = getelementptr inbounds i16, ptr %px.10756.unr, i32 1 266 %tmp216 = load i16, ptr %px.10756.unr, align 2 267 %conv185.epil = sext i16 %tmp216 to i32 268 %incdec.ptr186.epil = getelementptr inbounds i16, ptr %py.8757.unr, i32 -1 269 %tmp217 = load i16, ptr %py.8757.unr, align 2 270 %conv187.epil = sext i16 %tmp217 to i32 271 %mul.epil = mul nsw i32 %conv187.epil, %conv185.epil 272 %conv188.epil = sext i32 %mul.epil to i64 273 %add189.epil = add nsw i64 %sum.3758.unr, %conv188.epil 274 %incdec.ptr190.epil = getelementptr inbounds i16, ptr %px.10756.unr, i32 2 275 %tmp218 = load i16, ptr %incdec.ptr184.epil, align 2 276 %conv191.epil = sext i16 %tmp218 to i32 277 %incdec.ptr192.epil = getelementptr inbounds i16, ptr %py.8757.unr, i32 -2 278 %tmp219 = load i16, ptr %incdec.ptr186.epil, align 2 279 %conv193.epil = sext i16 %tmp219 to i32 280 %mul194.epil = mul nsw i32 %conv193.epil, %conv191.epil 281 %conv195.epil = sext i32 %mul194.epil to i64 282 %add196.epil = add nsw i64 %add189.epil, %conv195.epil 283 %incdec.ptr197.epil = getelementptr inbounds i16, ptr %px.10756.unr, i32 3 284 %tmp220 = load i16, ptr %incdec.ptr190.epil, align 2 285 %conv198.epil = sext i16 %tmp220 to i32 286 %incdec.ptr199.epil = getelementptr inbounds i16, ptr %py.8757.unr, i32 -3 287 %tmp221 = load i16, ptr %incdec.ptr192.epil, align 2 288 %conv200.epil = sext i16 %tmp221 to i32 289 %mul201.epil = mul nsw i32 %conv200.epil, %conv198.epil 290 %conv202.epil = sext i32 %mul201.epil to i64 291 %add203.epil = add nsw i64 %add196.epil, %conv202.epil 292 %tmp222 = load i16, ptr %incdec.ptr197.epil, align 2 293 %conv205.epil = sext i16 %tmp222 to i32 294 %tmp223 = load i16, ptr %incdec.ptr199.epil, align 2 295 %conv207.epil = sext i16 %tmp223 to i32 296 %mul208.epil = mul nsw i32 %conv207.epil, %conv205.epil 297 %conv209.epil = sext i32 %mul208.epil to i64 298 %add210.epil = add nsw i64 %add203.epil, %conv209.epil 299 ret i64 %add210.epil 300} 301 302declare dso_local i32 @bar(i32, i32) local_unnamed_addr 303 304