1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -arm-parallel-dsp -mtriple=armv7-a -S %s -o - | FileCheck %s 3 4define i32 @exchange_1(ptr %a, ptr %b, i32 %acc) { 5; CHECK-LABEL: @exchange_1( 6; CHECK-NEXT: entry: 7; CHECK-NEXT: [[ADDR_A_1:%.*]] = getelementptr i16, ptr [[A:%.*]], i32 1 8; CHECK-NEXT: [[ADDR_B_1:%.*]] = getelementptr i16, ptr [[B:%.*]], i32 1 9; CHECK-NEXT: [[LD_A_0:%.*]] = load i16, ptr [[A]], align 2 10; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[A]], align 2 11; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 12; CHECK-NEXT: [[TMP3:%.*]] = sext i16 [[TMP2]] to i32 13; CHECK-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP1]], 16 14; CHECK-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16 15; CHECK-NEXT: [[TMP6:%.*]] = sext i16 [[TMP5]] to i32 16; CHECK-NEXT: [[SEXT_A_0:%.*]] = sext i16 [[LD_A_0]] to i32 17; CHECK-NEXT: [[LD_B_0:%.*]] = load i16, ptr [[B]], align 2 18; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[B]], align 2 19; CHECK-NEXT: [[TMP9:%.*]] = trunc i32 [[TMP8]] to i16 20; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.arm.smladx(i32 [[TMP1]], i32 [[TMP8]], i32 [[ACC:%.*]]) 21; CHECK-NEXT: [[TMP11:%.*]] = sext i16 [[TMP9]] to i32 22; CHECK-NEXT: [[TMP12:%.*]] = lshr i32 [[TMP8]], 16 23; CHECK-NEXT: [[TMP13:%.*]] = trunc i32 [[TMP12]] to i16 24; CHECK-NEXT: [[TMP14:%.*]] = sext i16 [[TMP13]] to i32 25; CHECK-NEXT: [[LD_A_1:%.*]] = load i16, ptr [[ADDR_A_1]], align 2 26; CHECK-NEXT: [[LD_B_1:%.*]] = load i16, ptr [[ADDR_B_1]], align 2 27; CHECK-NEXT: [[SEXT_A_1:%.*]] = sext i16 [[LD_A_1]] to i32 28; CHECK-NEXT: [[SEXT_B_1:%.*]] = sext i16 [[LD_B_1]] to i32 29; CHECK-NEXT: [[SEXT_B_0:%.*]] = sext i16 [[LD_B_0]] to i32 30; CHECK-NEXT: [[MUL_0:%.*]] = mul i32 [[TMP3]], [[TMP14]] 31; CHECK-NEXT: [[MUL_1:%.*]] = mul i32 [[TMP6]], [[TMP11]] 32; CHECK-NEXT: [[ADD:%.*]] = add i32 [[MUL_0]], [[MUL_1]] 33; CHECK-NEXT: [[RES:%.*]] = add i32 [[ADD]], [[ACC]] 34; CHECK-NEXT: ret i32 [[TMP10]] 35; 36entry: 37 %addr.a.1 = getelementptr i16, ptr %a, i32 1 38 %addr.b.1 = getelementptr i16, ptr %b, i32 1 39 %ld.a.0 = load i16, ptr %a 40 %sext.a.0 = sext i16 %ld.a.0 to i32 41 %ld.b.0 = load i16, ptr %b 42 %ld.a.1 = load i16, ptr %addr.a.1 43 %ld.b.1 = load i16, ptr %addr.b.1 44 %sext.a.1 = sext i16 %ld.a.1 to i32 45 %sext.b.1 = sext i16 %ld.b.1 to i32 46 %sext.b.0 = sext i16 %ld.b.0 to i32 47 %mul.0 = mul i32 %sext.a.0, %sext.b.1 48 %mul.1 = mul i32 %sext.a.1, %sext.b.0 49 %add = add i32 %mul.0, %mul.1 50 %res = add i32 %add, %acc 51 ret i32 %res 52} 53 54define i32 @exchange_2(ptr %a, ptr %b, i32 %acc) { 55; CHECK-LABEL: @exchange_2( 56; CHECK-NEXT: entry: 57; CHECK-NEXT: [[ADDR_A_1:%.*]] = getelementptr i16, ptr [[A:%.*]], i32 1 58; CHECK-NEXT: [[ADDR_B_1:%.*]] = getelementptr i16, ptr [[B:%.*]], i32 1 59; CHECK-NEXT: [[LD_A_0:%.*]] = load i16, ptr [[A]], align 2 60; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[A]], align 2 61; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 62; CHECK-NEXT: [[TMP3:%.*]] = sext i16 [[TMP2]] to i32 63; CHECK-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP1]], 16 64; CHECK-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16 65; CHECK-NEXT: [[TMP6:%.*]] = sext i16 [[TMP5]] to i32 66; CHECK-NEXT: [[SEXT_A_0:%.*]] = sext i16 [[LD_A_0]] to i32 67; CHECK-NEXT: [[LD_B_0:%.*]] = load i16, ptr [[B]], align 2 68; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[B]], align 2 69; CHECK-NEXT: [[TMP9:%.*]] = trunc i32 [[TMP8]] to i16 70; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.arm.smladx(i32 [[TMP1]], i32 [[TMP8]], i32 [[ACC:%.*]]) 71; CHECK-NEXT: [[TMP11:%.*]] = sext i16 [[TMP9]] to i32 72; CHECK-NEXT: [[TMP12:%.*]] = lshr i32 [[TMP8]], 16 73; CHECK-NEXT: [[TMP13:%.*]] = trunc i32 [[TMP12]] to i16 74; CHECK-NEXT: [[TMP14:%.*]] = sext i16 [[TMP13]] to i32 75; CHECK-NEXT: [[LD_A_1:%.*]] = load i16, ptr [[ADDR_A_1]], align 2 76; CHECK-NEXT: [[LD_B_1:%.*]] = load i16, ptr [[ADDR_B_1]], align 2 77; CHECK-NEXT: [[SEXT_A_1:%.*]] = sext i16 [[LD_A_1]] to i32 78; CHECK-NEXT: [[SEXT_B_1:%.*]] = sext i16 [[LD_B_1]] to i32 79; CHECK-NEXT: [[SEXT_B_0:%.*]] = sext i16 [[LD_B_0]] to i32 80; CHECK-NEXT: [[MUL_0:%.*]] = mul i32 [[TMP14]], [[TMP3]] 81; CHECK-NEXT: [[MUL_1:%.*]] = mul i32 [[TMP11]], [[TMP6]] 82; CHECK-NEXT: [[ADD:%.*]] = add i32 [[MUL_0]], [[MUL_1]] 83; CHECK-NEXT: [[RES:%.*]] = add i32 [[ADD]], [[ACC]] 84; CHECK-NEXT: ret i32 [[TMP10]] 85; 86entry: 87 %addr.a.1 = getelementptr i16, ptr %a, i32 1 88 %addr.b.1 = getelementptr i16, ptr %b, i32 1 89 %ld.a.0 = load i16, ptr %a 90 %sext.a.0 = sext i16 %ld.a.0 to i32 91 %ld.b.0 = load i16, ptr %b 92 %ld.a.1 = load i16, ptr %addr.a.1 93 %ld.b.1 = load i16, ptr %addr.b.1 94 %sext.a.1 = sext i16 %ld.a.1 to i32 95 %sext.b.1 = sext i16 %ld.b.1 to i32 96 %sext.b.0 = sext i16 %ld.b.0 to i32 97 %mul.0 = mul i32 %sext.b.1, %sext.a.0 98 %mul.1 = mul i32 %sext.b.0, %sext.a.1 99 %add = add i32 %mul.0, %mul.1 100 %res = add i32 %add, %acc 101 ret i32 %res 102} 103 104define i32 @exchange_3(ptr %a, ptr %b, i32 %acc) { 105; CHECK-LABEL: @exchange_3( 106; CHECK-NEXT: entry: 107; CHECK-NEXT: [[ADDR_A_1:%.*]] = getelementptr i16, ptr [[A:%.*]], i32 1 108; CHECK-NEXT: [[ADDR_B_1:%.*]] = getelementptr i16, ptr [[B:%.*]], i32 1 109; CHECK-NEXT: [[LD_A_0:%.*]] = load i16, ptr [[A]], align 2 110; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[A]], align 2 111; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 112; CHECK-NEXT: [[TMP3:%.*]] = sext i16 [[TMP2]] to i32 113; CHECK-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP1]], 16 114; CHECK-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16 115; CHECK-NEXT: [[TMP6:%.*]] = sext i16 [[TMP5]] to i32 116; CHECK-NEXT: [[SEXT_A_0:%.*]] = sext i16 [[LD_A_0]] to i32 117; CHECK-NEXT: [[LD_B_0:%.*]] = load i16, ptr [[B]], align 2 118; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[B]], align 2 119; CHECK-NEXT: [[TMP9:%.*]] = trunc i32 [[TMP8]] to i16 120; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.arm.smladx(i32 [[TMP8]], i32 [[TMP1]], i32 [[ACC:%.*]]) 121; CHECK-NEXT: [[TMP11:%.*]] = sext i16 [[TMP9]] to i32 122; CHECK-NEXT: [[TMP12:%.*]] = lshr i32 [[TMP8]], 16 123; CHECK-NEXT: [[TMP13:%.*]] = trunc i32 [[TMP12]] to i16 124; CHECK-NEXT: [[TMP14:%.*]] = sext i16 [[TMP13]] to i32 125; CHECK-NEXT: [[LD_A_1:%.*]] = load i16, ptr [[ADDR_A_1]], align 2 126; CHECK-NEXT: [[LD_B_1:%.*]] = load i16, ptr [[ADDR_B_1]], align 2 127; CHECK-NEXT: [[SEXT_A_1:%.*]] = sext i16 [[LD_A_1]] to i32 128; CHECK-NEXT: [[SEXT_B_1:%.*]] = sext i16 [[LD_B_1]] to i32 129; CHECK-NEXT: [[SEXT_B_0:%.*]] = sext i16 [[LD_B_0]] to i32 130; CHECK-NEXT: [[MUL_0:%.*]] = mul i32 [[TMP3]], [[TMP14]] 131; CHECK-NEXT: [[MUL_1:%.*]] = mul i32 [[TMP6]], [[TMP11]] 132; CHECK-NEXT: [[ADD:%.*]] = add i32 [[MUL_1]], [[MUL_0]] 133; CHECK-NEXT: [[RES:%.*]] = add i32 [[ADD]], [[ACC]] 134; CHECK-NEXT: ret i32 [[TMP10]] 135; 136entry: 137 %addr.a.1 = getelementptr i16, ptr %a, i32 1 138 %addr.b.1 = getelementptr i16, ptr %b, i32 1 139 %ld.a.0 = load i16, ptr %a 140 %sext.a.0 = sext i16 %ld.a.0 to i32 141 %ld.b.0 = load i16, ptr %b 142 %ld.a.1 = load i16, ptr %addr.a.1 143 %ld.b.1 = load i16, ptr %addr.b.1 144 %sext.a.1 = sext i16 %ld.a.1 to i32 145 %sext.b.1 = sext i16 %ld.b.1 to i32 146 %sext.b.0 = sext i16 %ld.b.0 to i32 147 %mul.0 = mul i32 %sext.a.0, %sext.b.1 148 %mul.1 = mul i32 %sext.a.1, %sext.b.0 149 %add = add i32 %mul.1, %mul.0 150 %res = add i32 %add, %acc 151 ret i32 %res 152} 153 154define i32 @exchange_4(ptr %a, ptr %b, i32 %acc) { 155; CHECK-LABEL: @exchange_4( 156; CHECK-NEXT: entry: 157; CHECK-NEXT: [[ADDR_A_1:%.*]] = getelementptr i16, ptr [[A:%.*]], i32 1 158; CHECK-NEXT: [[ADDR_B_1:%.*]] = getelementptr i16, ptr [[B:%.*]], i32 1 159; CHECK-NEXT: [[LD_A_0:%.*]] = load i16, ptr [[A]], align 2 160; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[A]], align 2 161; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 162; CHECK-NEXT: [[TMP3:%.*]] = sext i16 [[TMP2]] to i32 163; CHECK-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP1]], 16 164; CHECK-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16 165; CHECK-NEXT: [[TMP6:%.*]] = sext i16 [[TMP5]] to i32 166; CHECK-NEXT: [[SEXT_A_0:%.*]] = sext i16 [[LD_A_0]] to i32 167; CHECK-NEXT: [[LD_B_0:%.*]] = load i16, ptr [[B]], align 2 168; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[B]], align 2 169; CHECK-NEXT: [[TMP9:%.*]] = trunc i32 [[TMP8]] to i16 170; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.arm.smladx(i32 [[TMP8]], i32 [[TMP1]], i32 [[ACC:%.*]]) 171; CHECK-NEXT: [[TMP11:%.*]] = sext i16 [[TMP9]] to i32 172; CHECK-NEXT: [[TMP12:%.*]] = lshr i32 [[TMP8]], 16 173; CHECK-NEXT: [[TMP13:%.*]] = trunc i32 [[TMP12]] to i16 174; CHECK-NEXT: [[TMP14:%.*]] = sext i16 [[TMP13]] to i32 175; CHECK-NEXT: [[LD_A_1:%.*]] = load i16, ptr [[ADDR_A_1]], align 2 176; CHECK-NEXT: [[LD_B_1:%.*]] = load i16, ptr [[ADDR_B_1]], align 2 177; CHECK-NEXT: [[SEXT_A_1:%.*]] = sext i16 [[LD_A_1]] to i32 178; CHECK-NEXT: [[SEXT_B_1:%.*]] = sext i16 [[LD_B_1]] to i32 179; CHECK-NEXT: [[SEXT_B_0:%.*]] = sext i16 [[LD_B_0]] to i32 180; CHECK-NEXT: [[MUL_0:%.*]] = mul i32 [[TMP14]], [[TMP3]] 181; CHECK-NEXT: [[MUL_1:%.*]] = mul i32 [[TMP11]], [[TMP6]] 182; CHECK-NEXT: [[ADD:%.*]] = add i32 [[MUL_1]], [[MUL_0]] 183; CHECK-NEXT: [[RES:%.*]] = add i32 [[ADD]], [[ACC]] 184; CHECK-NEXT: ret i32 [[TMP10]] 185; 186entry: 187 %addr.a.1 = getelementptr i16, ptr %a, i32 1 188 %addr.b.1 = getelementptr i16, ptr %b, i32 1 189 %ld.a.0 = load i16, ptr %a 190 %sext.a.0 = sext i16 %ld.a.0 to i32 191 %ld.b.0 = load i16, ptr %b 192 %ld.a.1 = load i16, ptr %addr.a.1 193 %ld.b.1 = load i16, ptr %addr.b.1 194 %sext.a.1 = sext i16 %ld.a.1 to i32 195 %sext.b.1 = sext i16 %ld.b.1 to i32 196 %sext.b.0 = sext i16 %ld.b.0 to i32 197 %mul.0 = mul i32 %sext.b.1, %sext.a.0 198 %mul.1 = mul i32 %sext.b.0, %sext.a.1 199 %add = add i32 %mul.1, %mul.0 200 %res = add i32 %add, %acc 201 ret i32 %res 202} 203 204define i32 @exchange_multi_use_1(ptr %a, ptr %b, i32 %acc) { 205; CHECK-LABEL: @exchange_multi_use_1( 206; CHECK-NEXT: entry: 207; CHECK-NEXT: [[ADDR_A_1:%.*]] = getelementptr i16, ptr [[A:%.*]], i32 1 208; CHECK-NEXT: [[ADDR_B_1:%.*]] = getelementptr i16, ptr [[B:%.*]], i32 1 209; CHECK-NEXT: [[LD_A_0:%.*]] = load i16, ptr [[A]], align 2 210; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[A]], align 2 211; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 212; CHECK-NEXT: [[TMP3:%.*]] = sext i16 [[TMP2]] to i32 213; CHECK-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP1]], 16 214; CHECK-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16 215; CHECK-NEXT: [[TMP6:%.*]] = sext i16 [[TMP5]] to i32 216; CHECK-NEXT: [[SEXT_A_0:%.*]] = sext i16 [[LD_A_0]] to i32 217; CHECK-NEXT: [[LD_B_0:%.*]] = load i16, ptr [[B]], align 2 218; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[B]], align 2 219; CHECK-NEXT: [[TMP9:%.*]] = trunc i32 [[TMP8]] to i16 220; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.arm.smladx(i32 [[TMP1]], i32 [[TMP8]], i32 [[ACC:%.*]]) 221; CHECK-NEXT: [[TMP11:%.*]] = sext i16 [[TMP9]] to i32 222; CHECK-NEXT: [[TMP12:%.*]] = lshr i32 [[TMP8]], 16 223; CHECK-NEXT: [[TMP13:%.*]] = trunc i32 [[TMP12]] to i16 224; CHECK-NEXT: [[TMP14:%.*]] = sext i16 [[TMP13]] to i32 225; CHECK-NEXT: [[LD_A_1:%.*]] = load i16, ptr [[ADDR_A_1]], align 2 226; CHECK-NEXT: [[LD_B_1:%.*]] = load i16, ptr [[ADDR_B_1]], align 2 227; CHECK-NEXT: [[SEXT_A_1:%.*]] = sext i16 [[LD_A_1]] to i32 228; CHECK-NEXT: [[SEXT_B_1:%.*]] = sext i16 [[LD_B_1]] to i32 229; CHECK-NEXT: [[SEXT_B_0:%.*]] = sext i16 [[LD_B_0]] to i32 230; CHECK-NEXT: [[MUL_0:%.*]] = mul i32 [[TMP3]], [[TMP14]] 231; CHECK-NEXT: [[MUL_1:%.*]] = mul i32 [[TMP6]], [[TMP11]] 232; CHECK-NEXT: [[ADD:%.*]] = add i32 [[MUL_0]], [[MUL_1]] 233; CHECK-NEXT: [[ADDR_A_2:%.*]] = getelementptr i16, ptr [[A]], i32 2 234; CHECK-NEXT: [[ADDR_A_3:%.*]] = getelementptr i16, ptr [[A]], i32 3 235; CHECK-NEXT: [[LD_A_2:%.*]] = load i16, ptr [[ADDR_A_2]], align 2 236; CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[ADDR_A_2]], align 2 237; CHECK-NEXT: [[TMP17:%.*]] = trunc i32 [[TMP16]] to i16 238; CHECK-NEXT: [[TMP18:%.*]] = call i32 @llvm.arm.smlad(i32 [[TMP16]], i32 [[TMP8]], i32 [[TMP10]]) 239; CHECK-NEXT: [[TMP19:%.*]] = sext i16 [[TMP17]] to i32 240; CHECK-NEXT: [[TMP20:%.*]] = lshr i32 [[TMP16]], 16 241; CHECK-NEXT: [[TMP21:%.*]] = trunc i32 [[TMP20]] to i16 242; CHECK-NEXT: [[TMP22:%.*]] = sext i16 [[TMP21]] to i32 243; CHECK-NEXT: [[LD_A_3:%.*]] = load i16, ptr [[ADDR_A_3]], align 2 244; CHECK-NEXT: [[SEXT_A_2:%.*]] = sext i16 [[LD_A_2]] to i32 245; CHECK-NEXT: [[SEXT_A_3:%.*]] = sext i16 [[LD_A_3]] to i32 246; CHECK-NEXT: [[MUL_2:%.*]] = mul i32 [[TMP22]], [[TMP14]] 247; CHECK-NEXT: [[MUL_3:%.*]] = mul i32 [[TMP19]], [[TMP11]] 248; CHECK-NEXT: [[ADD_1:%.*]] = add i32 [[MUL_2]], [[MUL_3]] 249; CHECK-NEXT: [[ADD_2:%.*]] = add i32 [[ADD]], [[ADD_1]] 250; CHECK-NEXT: [[RES:%.*]] = add i32 [[ADD_2]], [[ACC]] 251; CHECK-NEXT: ret i32 [[TMP18]] 252; 253entry: 254 %addr.a.1 = getelementptr i16, ptr %a, i32 1 255 %addr.b.1 = getelementptr i16, ptr %b, i32 1 256 %ld.a.0 = load i16, ptr %a 257 %sext.a.0 = sext i16 %ld.a.0 to i32 258 %ld.b.0 = load i16, ptr %b 259 %ld.a.1 = load i16, ptr %addr.a.1 260 %ld.b.1 = load i16, ptr %addr.b.1 261 %sext.a.1 = sext i16 %ld.a.1 to i32 262 %sext.b.1 = sext i16 %ld.b.1 to i32 263 %sext.b.0 = sext i16 %ld.b.0 to i32 264 %mul.0 = mul i32 %sext.a.0, %sext.b.1 265 %mul.1 = mul i32 %sext.a.1, %sext.b.0 266 %add = add i32 %mul.0, %mul.1 267 %addr.a.2 = getelementptr i16, ptr %a, i32 2 268 %addr.a.3 = getelementptr i16, ptr %a, i32 3 269 %ld.a.2 = load i16, ptr %addr.a.2 270 %ld.a.3 = load i16, ptr %addr.a.3 271 %sext.a.2 = sext i16 %ld.a.2 to i32 272 %sext.a.3 = sext i16 %ld.a.3 to i32 273 %mul.2 = mul i32 %sext.a.3, %sext.b.1 274 %mul.3 = mul i32 %sext.a.2, %sext.b.0 275 %add.1 = add i32 %mul.2, %mul.3 276 %add.2 = add i32 %add, %add.1 277 %res = add i32 %add.2, %acc 278 ret i32 %res 279} 280 281define i64 @exchange_multi_use_64_1(ptr %a, ptr %b, i64 %acc) { 282; CHECK-LABEL: @exchange_multi_use_64_1( 283; CHECK-NEXT: entry: 284; CHECK-NEXT: [[ADDR_A_1:%.*]] = getelementptr i16, ptr [[A:%.*]], i32 1 285; CHECK-NEXT: [[ADDR_B_1:%.*]] = getelementptr i16, ptr [[B:%.*]], i32 1 286; CHECK-NEXT: [[LD_A_0:%.*]] = load i16, ptr [[A]], align 2 287; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[A]], align 2 288; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 289; CHECK-NEXT: [[TMP3:%.*]] = sext i16 [[TMP2]] to i32 290; CHECK-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP1]], 16 291; CHECK-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16 292; CHECK-NEXT: [[TMP6:%.*]] = sext i16 [[TMP5]] to i32 293; CHECK-NEXT: [[SEXT_A_0:%.*]] = sext i16 [[LD_A_0]] to i32 294; CHECK-NEXT: [[LD_B_0:%.*]] = load i16, ptr [[B]], align 2 295; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[B]], align 2 296; CHECK-NEXT: [[TMP9:%.*]] = trunc i32 [[TMP8]] to i16 297; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.arm.smlaldx(i32 [[TMP1]], i32 [[TMP8]], i64 [[ACC:%.*]]) 298; CHECK-NEXT: [[TMP11:%.*]] = sext i16 [[TMP9]] to i32 299; CHECK-NEXT: [[TMP12:%.*]] = lshr i32 [[TMP8]], 16 300; CHECK-NEXT: [[TMP13:%.*]] = trunc i32 [[TMP12]] to i16 301; CHECK-NEXT: [[TMP14:%.*]] = sext i16 [[TMP13]] to i32 302; CHECK-NEXT: [[LD_A_1:%.*]] = load i16, ptr [[ADDR_A_1]], align 2 303; CHECK-NEXT: [[LD_B_1:%.*]] = load i16, ptr [[ADDR_B_1]], align 2 304; CHECK-NEXT: [[SEXT_A_1:%.*]] = sext i16 [[LD_A_1]] to i32 305; CHECK-NEXT: [[SEXT_B_1:%.*]] = sext i16 [[LD_B_1]] to i32 306; CHECK-NEXT: [[SEXT_B_0:%.*]] = sext i16 [[LD_B_0]] to i32 307; CHECK-NEXT: [[MUL_0:%.*]] = mul i32 [[TMP3]], [[TMP14]] 308; CHECK-NEXT: [[MUL_1:%.*]] = mul i32 [[TMP6]], [[TMP11]] 309; CHECK-NEXT: [[ADD:%.*]] = add i32 [[MUL_0]], [[MUL_1]] 310; CHECK-NEXT: [[ADDR_A_2:%.*]] = getelementptr i16, ptr [[A]], i32 2 311; CHECK-NEXT: [[ADDR_A_3:%.*]] = getelementptr i16, ptr [[A]], i32 3 312; CHECK-NEXT: [[LD_A_2:%.*]] = load i16, ptr [[ADDR_A_2]], align 2 313; CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[ADDR_A_2]], align 2 314; CHECK-NEXT: [[TMP17:%.*]] = trunc i32 [[TMP16]] to i16 315; CHECK-NEXT: [[TMP18:%.*]] = call i64 @llvm.arm.smlald(i32 [[TMP16]], i32 [[TMP8]], i64 [[TMP10]]) 316; CHECK-NEXT: [[TMP19:%.*]] = sext i16 [[TMP17]] to i32 317; CHECK-NEXT: [[TMP20:%.*]] = lshr i32 [[TMP16]], 16 318; CHECK-NEXT: [[TMP21:%.*]] = trunc i32 [[TMP20]] to i16 319; CHECK-NEXT: [[TMP22:%.*]] = sext i16 [[TMP21]] to i32 320; CHECK-NEXT: [[LD_A_3:%.*]] = load i16, ptr [[ADDR_A_3]], align 2 321; CHECK-NEXT: [[SEXT_A_2:%.*]] = sext i16 [[LD_A_2]] to i32 322; CHECK-NEXT: [[SEXT_A_3:%.*]] = sext i16 [[LD_A_3]] to i32 323; CHECK-NEXT: [[MUL_2:%.*]] = mul i32 [[TMP22]], [[TMP14]] 324; CHECK-NEXT: [[MUL_3:%.*]] = mul i32 [[TMP19]], [[TMP11]] 325; CHECK-NEXT: [[ADD_1:%.*]] = add i32 [[MUL_2]], [[MUL_3]] 326; CHECK-NEXT: [[ADD_2:%.*]] = add i32 [[ADD]], [[ADD_1]] 327; CHECK-NEXT: [[SEXT_ADD_2:%.*]] = sext i32 [[ADD_2]] to i64 328; CHECK-NEXT: [[RES:%.*]] = add i64 [[SEXT_ADD_2]], [[ACC]] 329; CHECK-NEXT: ret i64 [[TMP18]] 330; 331entry: 332 %addr.a.1 = getelementptr i16, ptr %a, i32 1 333 %addr.b.1 = getelementptr i16, ptr %b, i32 1 334 %ld.a.0 = load i16, ptr %a 335 %sext.a.0 = sext i16 %ld.a.0 to i32 336 %ld.b.0 = load i16, ptr %b 337 %ld.a.1 = load i16, ptr %addr.a.1 338 %ld.b.1 = load i16, ptr %addr.b.1 339 %sext.a.1 = sext i16 %ld.a.1 to i32 340 %sext.b.1 = sext i16 %ld.b.1 to i32 341 %sext.b.0 = sext i16 %ld.b.0 to i32 342 %mul.0 = mul i32 %sext.a.0, %sext.b.1 343 %mul.1 = mul i32 %sext.a.1, %sext.b.0 344 %add = add i32 %mul.0, %mul.1 345 %addr.a.2 = getelementptr i16, ptr %a, i32 2 346 %addr.a.3 = getelementptr i16, ptr %a, i32 3 347 %ld.a.2 = load i16, ptr %addr.a.2 348 %ld.a.3 = load i16, ptr %addr.a.3 349 %sext.a.2 = sext i16 %ld.a.2 to i32 350 %sext.a.3 = sext i16 %ld.a.3 to i32 351 %mul.2 = mul i32 %sext.a.3, %sext.b.1 352 %mul.3 = mul i32 %sext.a.2, %sext.b.0 353 %add.1 = add i32 %mul.2, %mul.3 354 %add.2 = add i32 %add, %add.1 355 %sext.add.2 = sext i32 %add.2 to i64 356 %res = add i64 %sext.add.2, %acc 357 ret i64 %res 358} 359 360define i64 @exchange_multi_use_64_2(ptr %a, ptr %b, i64 %acc) { 361; CHECK-LABEL: @exchange_multi_use_64_2( 362; CHECK-NEXT: entry: 363; CHECK-NEXT: [[ADDR_A_1:%.*]] = getelementptr i16, ptr [[A:%.*]], i32 1 364; CHECK-NEXT: [[ADDR_B_1:%.*]] = getelementptr i16, ptr [[B:%.*]], i32 1 365; CHECK-NEXT: [[LD_A_0:%.*]] = load i16, ptr [[A]], align 2 366; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[A]], align 2 367; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 368; CHECK-NEXT: [[TMP3:%.*]] = sext i16 [[TMP2]] to i32 369; CHECK-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP1]], 16 370; CHECK-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16 371; CHECK-NEXT: [[TMP6:%.*]] = sext i16 [[TMP5]] to i32 372; CHECK-NEXT: [[SEXT_A_0:%.*]] = sext i16 [[LD_A_0]] to i32 373; CHECK-NEXT: [[LD_B_0:%.*]] = load i16, ptr [[B]], align 2 374; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[B]], align 2 375; CHECK-NEXT: [[TMP9:%.*]] = trunc i32 [[TMP8]] to i16 376; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.arm.smlaldx(i32 [[TMP1]], i32 [[TMP8]], i64 [[ACC:%.*]]) 377; CHECK-NEXT: [[TMP11:%.*]] = sext i16 [[TMP9]] to i32 378; CHECK-NEXT: [[TMP12:%.*]] = lshr i32 [[TMP8]], 16 379; CHECK-NEXT: [[TMP13:%.*]] = trunc i32 [[TMP12]] to i16 380; CHECK-NEXT: [[TMP14:%.*]] = sext i16 [[TMP13]] to i32 381; CHECK-NEXT: [[LD_A_1:%.*]] = load i16, ptr [[ADDR_A_1]], align 2 382; CHECK-NEXT: [[LD_B_1:%.*]] = load i16, ptr [[ADDR_B_1]], align 2 383; CHECK-NEXT: [[SEXT_A_1:%.*]] = sext i16 [[LD_A_1]] to i32 384; CHECK-NEXT: [[SEXT_B_1:%.*]] = sext i16 [[LD_B_1]] to i32 385; CHECK-NEXT: [[SEXT_B_0:%.*]] = sext i16 [[LD_B_0]] to i32 386; CHECK-NEXT: [[MUL_0:%.*]] = mul i32 [[TMP3]], [[TMP14]] 387; CHECK-NEXT: [[MUL_1:%.*]] = mul i32 [[TMP6]], [[TMP11]] 388; CHECK-NEXT: [[ADD:%.*]] = add i32 [[MUL_0]], [[MUL_1]] 389; CHECK-NEXT: [[SEXT_ADD:%.*]] = sext i32 [[ADD]] to i64 390; CHECK-NEXT: [[ADDR_A_2:%.*]] = getelementptr i16, ptr [[A]], i32 2 391; CHECK-NEXT: [[ADDR_A_3:%.*]] = getelementptr i16, ptr [[A]], i32 3 392; CHECK-NEXT: [[LD_A_2:%.*]] = load i16, ptr [[ADDR_A_2]], align 2 393; CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[ADDR_A_2]], align 2 394; CHECK-NEXT: [[TMP17:%.*]] = trunc i32 [[TMP16]] to i16 395; CHECK-NEXT: [[TMP18:%.*]] = call i64 @llvm.arm.smlald(i32 [[TMP16]], i32 [[TMP8]], i64 [[TMP10]]) 396; CHECK-NEXT: [[TMP19:%.*]] = sext i16 [[TMP17]] to i32 397; CHECK-NEXT: [[TMP20:%.*]] = lshr i32 [[TMP16]], 16 398; CHECK-NEXT: [[TMP21:%.*]] = trunc i32 [[TMP20]] to i16 399; CHECK-NEXT: [[TMP22:%.*]] = sext i16 [[TMP21]] to i32 400; CHECK-NEXT: [[LD_A_3:%.*]] = load i16, ptr [[ADDR_A_3]], align 2 401; CHECK-NEXT: [[SEXT_A_2:%.*]] = sext i16 [[LD_A_2]] to i32 402; CHECK-NEXT: [[SEXT_A_3:%.*]] = sext i16 [[LD_A_3]] to i32 403; CHECK-NEXT: [[MUL_2:%.*]] = mul i32 [[TMP22]], [[TMP14]] 404; CHECK-NEXT: [[MUL_3:%.*]] = mul i32 [[TMP19]], [[TMP11]] 405; CHECK-NEXT: [[ADD_1:%.*]] = add i32 [[MUL_2]], [[MUL_3]] 406; CHECK-NEXT: [[SEXT_ADD_1:%.*]] = sext i32 [[ADD_1]] to i64 407; CHECK-NEXT: [[ADD_2:%.*]] = add i64 [[SEXT_ADD]], [[SEXT_ADD_1]] 408; CHECK-NEXT: [[RES:%.*]] = add i64 [[ADD_2]], [[ACC]] 409; CHECK-NEXT: ret i64 [[TMP18]] 410; 411entry: 412 %addr.a.1 = getelementptr i16, ptr %a, i32 1 413 %addr.b.1 = getelementptr i16, ptr %b, i32 1 414 %ld.a.0 = load i16, ptr %a 415 %sext.a.0 = sext i16 %ld.a.0 to i32 416 %ld.b.0 = load i16, ptr %b 417 %ld.a.1 = load i16, ptr %addr.a.1 418 %ld.b.1 = load i16, ptr %addr.b.1 419 %sext.a.1 = sext i16 %ld.a.1 to i32 420 %sext.b.1 = sext i16 %ld.b.1 to i32 421 %sext.b.0 = sext i16 %ld.b.0 to i32 422 %mul.0 = mul i32 %sext.a.0, %sext.b.1 423 %mul.1 = mul i32 %sext.a.1, %sext.b.0 424 %add = add i32 %mul.0, %mul.1 425 %sext.add = sext i32 %add to i64 426 %addr.a.2 = getelementptr i16, ptr %a, i32 2 427 %addr.a.3 = getelementptr i16, ptr %a, i32 3 428 %ld.a.2 = load i16, ptr %addr.a.2 429 %ld.a.3 = load i16, ptr %addr.a.3 430 %sext.a.2 = sext i16 %ld.a.2 to i32 431 %sext.a.3 = sext i16 %ld.a.3 to i32 432 %mul.2 = mul i32 %sext.a.3, %sext.b.1 433 %mul.3 = mul i32 %sext.a.2, %sext.b.0 434 %add.1 = add i32 %mul.2, %mul.3 435 %sext.add.1 = sext i32 %add.1 to i64 436 %add.2 = add i64 %sext.add, %sext.add.1 437 %res = add i64 %add.2, %acc 438 ret i64 %res 439} 440 441define i32 @exchange_multi_use_2(ptr %a, ptr %b, i32 %acc) { 442; CHECK-LABEL: @exchange_multi_use_2( 443; CHECK-NEXT: entry: 444; CHECK-NEXT: [[ADDR_A_1:%.*]] = getelementptr i16, ptr [[A:%.*]], i32 1 445; CHECK-NEXT: [[ADDR_B_1:%.*]] = getelementptr i16, ptr [[B:%.*]], i32 1 446; CHECK-NEXT: [[LD_A_0:%.*]] = load i16, ptr [[A]], align 2 447; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[A]], align 2 448; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 449; CHECK-NEXT: [[TMP3:%.*]] = sext i16 [[TMP2]] to i32 450; CHECK-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP1]], 16 451; CHECK-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16 452; CHECK-NEXT: [[TMP6:%.*]] = sext i16 [[TMP5]] to i32 453; CHECK-NEXT: [[SEXT_A_0:%.*]] = sext i16 [[LD_A_0]] to i32 454; CHECK-NEXT: [[LD_B_0:%.*]] = load i16, ptr [[B]], align 2 455; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[B]], align 2 456; CHECK-NEXT: [[TMP9:%.*]] = trunc i32 [[TMP8]] to i16 457; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.arm.smlad(i32 [[TMP1]], i32 [[TMP8]], i32 [[ACC:%.*]]) 458; CHECK-NEXT: [[TMP11:%.*]] = sext i16 [[TMP9]] to i32 459; CHECK-NEXT: [[TMP12:%.*]] = lshr i32 [[TMP8]], 16 460; CHECK-NEXT: [[TMP13:%.*]] = trunc i32 [[TMP12]] to i16 461; CHECK-NEXT: [[TMP14:%.*]] = sext i16 [[TMP13]] to i32 462; CHECK-NEXT: [[LD_A_1:%.*]] = load i16, ptr [[ADDR_A_1]], align 2 463; CHECK-NEXT: [[LD_B_1:%.*]] = load i16, ptr [[ADDR_B_1]], align 2 464; CHECK-NEXT: [[SEXT_A_1:%.*]] = sext i16 [[LD_A_1]] to i32 465; CHECK-NEXT: [[SEXT_B_1:%.*]] = sext i16 [[LD_B_1]] to i32 466; CHECK-NEXT: [[SEXT_B_0:%.*]] = sext i16 [[LD_B_0]] to i32 467; CHECK-NEXT: [[MUL_0:%.*]] = mul i32 [[TMP3]], [[TMP11]] 468; CHECK-NEXT: [[MUL_1:%.*]] = mul i32 [[TMP6]], [[TMP14]] 469; CHECK-NEXT: [[ADD:%.*]] = add i32 [[MUL_0]], [[MUL_1]] 470; CHECK-NEXT: [[ADDR_A_2:%.*]] = getelementptr i16, ptr [[A]], i32 2 471; CHECK-NEXT: [[ADDR_A_3:%.*]] = getelementptr i16, ptr [[A]], i32 3 472; CHECK-NEXT: [[LD_A_2:%.*]] = load i16, ptr [[ADDR_A_2]], align 2 473; CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[ADDR_A_2]], align 2 474; CHECK-NEXT: [[TMP17:%.*]] = trunc i32 [[TMP16]] to i16 475; CHECK-NEXT: [[TMP18:%.*]] = call i32 @llvm.arm.smladx(i32 [[TMP8]], i32 [[TMP16]], i32 [[TMP10]]) 476; CHECK-NEXT: [[TMP19:%.*]] = sext i16 [[TMP17]] to i32 477; CHECK-NEXT: [[TMP20:%.*]] = lshr i32 [[TMP16]], 16 478; CHECK-NEXT: [[TMP21:%.*]] = trunc i32 [[TMP20]] to i16 479; CHECK-NEXT: [[TMP22:%.*]] = sext i16 [[TMP21]] to i32 480; CHECK-NEXT: [[LD_A_3:%.*]] = load i16, ptr [[ADDR_A_3]], align 2 481; CHECK-NEXT: [[SEXT_A_2:%.*]] = sext i16 [[LD_A_2]] to i32 482; CHECK-NEXT: [[SEXT_A_3:%.*]] = sext i16 [[LD_A_3]] to i32 483; CHECK-NEXT: [[MUL_2:%.*]] = mul i32 [[TMP11]], [[TMP22]] 484; CHECK-NEXT: [[MUL_3:%.*]] = mul i32 [[TMP14]], [[TMP19]] 485; CHECK-NEXT: [[ADD_1:%.*]] = add i32 [[MUL_2]], [[MUL_3]] 486; CHECK-NEXT: [[ADD_2:%.*]] = add i32 [[ADD]], [[ADD_1]] 487; CHECK-NEXT: [[RES:%.*]] = add i32 [[ADD_2]], [[ACC]] 488; CHECK-NEXT: ret i32 [[TMP18]] 489; 490entry: 491 %addr.a.1 = getelementptr i16, ptr %a, i32 1 492 %addr.b.1 = getelementptr i16, ptr %b, i32 1 493 %ld.a.0 = load i16, ptr %a 494 %sext.a.0 = sext i16 %ld.a.0 to i32 495 %ld.b.0 = load i16, ptr %b 496 %ld.a.1 = load i16, ptr %addr.a.1 497 %ld.b.1 = load i16, ptr %addr.b.1 498 %sext.a.1 = sext i16 %ld.a.1 to i32 499 %sext.b.1 = sext i16 %ld.b.1 to i32 500 %sext.b.0 = sext i16 %ld.b.0 to i32 501 %mul.0 = mul i32 %sext.a.0, %sext.b.0 502 %mul.1 = mul i32 %sext.a.1, %sext.b.1 503 %add = add i32 %mul.0, %mul.1 504 %addr.a.2 = getelementptr i16, ptr %a, i32 2 505 %addr.a.3 = getelementptr i16, ptr %a, i32 3 506 %ld.a.2 = load i16, ptr %addr.a.2 507 %ld.a.3 = load i16, ptr %addr.a.3 508 %sext.a.2 = sext i16 %ld.a.2 to i32 509 %sext.a.3 = sext i16 %ld.a.3 to i32 510 %mul.2 = mul i32 %sext.b.0, %sext.a.3 511 %mul.3 = mul i32 %sext.b.1, %sext.a.2 512 %add.1 = add i32 %mul.2, %mul.3 513 %add.2 = add i32 %add, %add.1 514 %res = add i32 %add.2, %acc 515 ret i32 %res 516} 517 518; TODO: Why aren't two intrinsics generated? 519define i32 @exchange_multi_use_3(ptr %a, ptr %b, i32 %acc) { 520; CHECK-LABEL: @exchange_multi_use_3( 521; CHECK-NEXT: entry: 522; CHECK-NEXT: [[ADDR_A_1:%.*]] = getelementptr i16, ptr [[A:%.*]], i32 1 523; CHECK-NEXT: [[ADDR_B_1:%.*]] = getelementptr i16, ptr [[B:%.*]], i32 1 524; CHECK-NEXT: [[LD_A_0:%.*]] = load i16, ptr [[A]], align 2 525; CHECK-NEXT: [[SEXT_A_0:%.*]] = sext i16 [[LD_A_0]] to i32 526; CHECK-NEXT: [[LD_B_0:%.*]] = load i16, ptr [[B]], align 2 527; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[B]], align 2 528; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 529; CHECK-NEXT: [[TMP3:%.*]] = sext i16 [[TMP2]] to i32 530; CHECK-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP1]], 16 531; CHECK-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16 532; CHECK-NEXT: [[TMP6:%.*]] = sext i16 [[TMP5]] to i32 533; CHECK-NEXT: [[LD_A_1:%.*]] = load i16, ptr [[ADDR_A_1]], align 2 534; CHECK-NEXT: [[LD_B_1:%.*]] = load i16, ptr [[ADDR_B_1]], align 2 535; CHECK-NEXT: [[SEXT_A_1:%.*]] = sext i16 [[LD_A_1]] to i32 536; CHECK-NEXT: [[SEXT_B_1:%.*]] = sext i16 [[LD_B_1]] to i32 537; CHECK-NEXT: [[SEXT_B_0:%.*]] = sext i16 [[LD_B_0]] to i32 538; CHECK-NEXT: [[ADDR_A_2:%.*]] = getelementptr i16, ptr [[A]], i32 2 539; CHECK-NEXT: [[ADDR_A_3:%.*]] = getelementptr i16, ptr [[A]], i32 3 540; CHECK-NEXT: [[LD_A_2:%.*]] = load i16, ptr [[ADDR_A_2]], align 2 541; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[ADDR_A_2]], align 2 542; CHECK-NEXT: [[TMP9:%.*]] = trunc i32 [[TMP8]] to i16 543; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.arm.smladx(i32 [[TMP1]], i32 [[TMP8]], i32 0) 544; CHECK-NEXT: [[TMP11:%.*]] = sext i16 [[TMP9]] to i32 545; CHECK-NEXT: [[TMP12:%.*]] = lshr i32 [[TMP8]], 16 546; CHECK-NEXT: [[TMP13:%.*]] = trunc i32 [[TMP12]] to i16 547; CHECK-NEXT: [[TMP14:%.*]] = sext i16 [[TMP13]] to i32 548; CHECK-NEXT: [[LD_A_3:%.*]] = load i16, ptr [[ADDR_A_3]], align 2 549; CHECK-NEXT: [[SEXT_A_2:%.*]] = sext i16 [[LD_A_2]] to i32 550; CHECK-NEXT: [[SEXT_A_3:%.*]] = sext i16 [[LD_A_3]] to i32 551; CHECK-NEXT: [[MUL_2:%.*]] = mul i32 [[TMP3]], [[TMP14]] 552; CHECK-NEXT: [[MUL_3:%.*]] = mul i32 [[TMP6]], [[TMP11]] 553; CHECK-NEXT: [[MUL_0:%.*]] = mul i32 [[SEXT_A_0]], [[TMP3]] 554; CHECK-NEXT: [[MUL_1:%.*]] = mul i32 [[SEXT_A_1]], [[TMP6]] 555; CHECK-NEXT: [[ADD:%.*]] = add i32 [[MUL_0]], [[MUL_1]] 556; CHECK-NEXT: [[ADD_1:%.*]] = add i32 [[MUL_2]], [[MUL_3]] 557; CHECK-NEXT: [[SUB:%.*]] = sub i32 [[ADD]], [[TMP10]] 558; CHECK-NEXT: [[RES:%.*]] = add i32 [[ACC:%.*]], [[SUB]] 559; CHECK-NEXT: ret i32 [[RES]] 560; 561entry: 562 %addr.a.1 = getelementptr i16, ptr %a, i32 1 563 %addr.b.1 = getelementptr i16, ptr %b, i32 1 564 %ld.a.0 = load i16, ptr %a 565 %sext.a.0 = sext i16 %ld.a.0 to i32 566 %ld.b.0 = load i16, ptr %b 567 %ld.a.1 = load i16, ptr %addr.a.1 568 %ld.b.1 = load i16, ptr %addr.b.1 569 %sext.a.1 = sext i16 %ld.a.1 to i32 570 %sext.b.1 = sext i16 %ld.b.1 to i32 571 %sext.b.0 = sext i16 %ld.b.0 to i32 572 %addr.a.2 = getelementptr i16, ptr %a, i32 2 573 %addr.a.3 = getelementptr i16, ptr %a, i32 3 574 %ld.a.2 = load i16, ptr %addr.a.2 575 %ld.a.3 = load i16, ptr %addr.a.3 576 %sext.a.2 = sext i16 %ld.a.2 to i32 577 %sext.a.3 = sext i16 %ld.a.3 to i32 578 %mul.2 = mul i32 %sext.b.0, %sext.a.3 579 %mul.3 = mul i32 %sext.b.1, %sext.a.2 580 %mul.0 = mul i32 %sext.a.0, %sext.b.0 581 %mul.1 = mul i32 %sext.a.1, %sext.b.1 582 %add = add i32 %mul.0, %mul.1 583 %add.1 = add i32 %mul.2, %mul.3 584 %sub = sub i32 %add, %add.1 585 %res = add i32 %acc, %sub 586 ret i32 %res 587} 588 589; TODO: Would it be better to generate a smlad and then sign extend it? 590define i64 @exchange_multi_use_64_3(ptr %a, ptr %b, i64 %acc) { 591; CHECK-LABEL: @exchange_multi_use_64_3( 592; CHECK-NEXT: entry: 593; CHECK-NEXT: [[ADDR_A_1:%.*]] = getelementptr i16, ptr [[A:%.*]], i32 1 594; CHECK-NEXT: [[ADDR_B_1:%.*]] = getelementptr i16, ptr [[B:%.*]], i32 1 595; CHECK-NEXT: [[LD_A_0:%.*]] = load i16, ptr [[A]], align 2 596; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[A]], align 2 597; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 598; CHECK-NEXT: [[TMP3:%.*]] = sext i16 [[TMP2]] to i32 599; CHECK-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP1]], 16 600; CHECK-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16 601; CHECK-NEXT: [[TMP6:%.*]] = sext i16 [[TMP5]] to i32 602; CHECK-NEXT: [[SEXT_A_0:%.*]] = sext i16 [[LD_A_0]] to i32 603; CHECK-NEXT: [[LD_B_0:%.*]] = load i16, ptr [[B]], align 2 604; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[B]], align 2 605; CHECK-NEXT: [[TMP9:%.*]] = trunc i32 [[TMP8]] to i16 606; CHECK-NEXT: [[TMP10:%.*]] = sext i16 [[TMP9]] to i32 607; CHECK-NEXT: [[TMP11:%.*]] = lshr i32 [[TMP8]], 16 608; CHECK-NEXT: [[TMP12:%.*]] = trunc i32 [[TMP11]] to i16 609; CHECK-NEXT: [[TMP13:%.*]] = sext i16 [[TMP12]] to i32 610; CHECK-NEXT: [[LD_A_1:%.*]] = load i16, ptr [[ADDR_A_1]], align 2 611; CHECK-NEXT: [[LD_B_1:%.*]] = load i16, ptr [[ADDR_B_1]], align 2 612; CHECK-NEXT: [[SEXT_A_1:%.*]] = sext i16 [[LD_A_1]] to i32 613; CHECK-NEXT: [[SEXT_B_1:%.*]] = sext i16 [[LD_B_1]] to i32 614; CHECK-NEXT: [[SEXT_B_0:%.*]] = sext i16 [[LD_B_0]] to i32 615; CHECK-NEXT: [[ADDR_A_2:%.*]] = getelementptr i16, ptr [[A]], i32 2 616; CHECK-NEXT: [[ADDR_A_3:%.*]] = getelementptr i16, ptr [[A]], i32 3 617; CHECK-NEXT: [[LD_A_2:%.*]] = load i16, ptr [[ADDR_A_2]], align 2 618; CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[ADDR_A_2]], align 2 619; CHECK-NEXT: [[TMP16:%.*]] = trunc i32 [[TMP15]] to i16 620; CHECK-NEXT: [[TMP17:%.*]] = call i64 @llvm.arm.smlaldx(i32 [[TMP8]], i32 [[TMP15]], i64 0) 621; CHECK-NEXT: [[TMP18:%.*]] = call i64 @llvm.arm.smlald(i32 [[TMP1]], i32 [[TMP8]], i64 [[TMP17]]) 622; CHECK-NEXT: [[TMP19:%.*]] = sext i16 [[TMP16]] to i32 623; CHECK-NEXT: [[TMP20:%.*]] = lshr i32 [[TMP15]], 16 624; CHECK-NEXT: [[TMP21:%.*]] = trunc i32 [[TMP20]] to i16 625; CHECK-NEXT: [[TMP22:%.*]] = sext i16 [[TMP21]] to i32 626; CHECK-NEXT: [[LD_A_3:%.*]] = load i16, ptr [[ADDR_A_3]], align 2 627; CHECK-NEXT: [[SEXT_A_2:%.*]] = sext i16 [[LD_A_2]] to i32 628; CHECK-NEXT: [[SEXT_A_3:%.*]] = sext i16 [[LD_A_3]] to i32 629; CHECK-NEXT: [[MUL_2:%.*]] = mul i32 [[TMP10]], [[TMP22]] 630; CHECK-NEXT: [[MUL_3:%.*]] = mul i32 [[TMP13]], [[TMP19]] 631; CHECK-NEXT: [[MUL_0:%.*]] = mul i32 [[TMP3]], [[TMP10]] 632; CHECK-NEXT: [[MUL_1:%.*]] = mul i32 [[TMP6]], [[TMP13]] 633; CHECK-NEXT: [[ADD:%.*]] = add i32 [[MUL_0]], [[MUL_1]] 634; CHECK-NEXT: [[ADD_1:%.*]] = add i32 [[MUL_2]], [[MUL_3]] 635; CHECK-NEXT: [[SEXT_ADD:%.*]] = sext i32 [[ADD]] to i64 636; CHECK-NEXT: [[SEXT_ADD_1:%.*]] = sext i32 [[ADD_1]] to i64 637; CHECK-NEXT: [[ADD_2:%.*]] = add i64 [[SEXT_ADD]], [[SEXT_ADD_1]] 638; CHECK-NEXT: [[RES:%.*]] = sub i64 [[ACC:%.*]], [[TMP18]] 639; CHECK-NEXT: ret i64 [[RES]] 640; 641entry: 642 %addr.a.1 = getelementptr i16, ptr %a, i32 1 643 %addr.b.1 = getelementptr i16, ptr %b, i32 1 644 %ld.a.0 = load i16, ptr %a 645 %sext.a.0 = sext i16 %ld.a.0 to i32 646 %ld.b.0 = load i16, ptr %b 647 %ld.a.1 = load i16, ptr %addr.a.1 648 %ld.b.1 = load i16, ptr %addr.b.1 649 %sext.a.1 = sext i16 %ld.a.1 to i32 650 %sext.b.1 = sext i16 %ld.b.1 to i32 651 %sext.b.0 = sext i16 %ld.b.0 to i32 652 %addr.a.2 = getelementptr i16, ptr %a, i32 2 653 %addr.a.3 = getelementptr i16, ptr %a, i32 3 654 %ld.a.2 = load i16, ptr %addr.a.2 655 %ld.a.3 = load i16, ptr %addr.a.3 656 %sext.a.2 = sext i16 %ld.a.2 to i32 657 %sext.a.3 = sext i16 %ld.a.3 to i32 658 %mul.2 = mul i32 %sext.b.0, %sext.a.3 659 %mul.3 = mul i32 %sext.b.1, %sext.a.2 660 %mul.0 = mul i32 %sext.a.0, %sext.b.0 661 %mul.1 = mul i32 %sext.a.1, %sext.b.1 662 %add = add i32 %mul.0, %mul.1 663 %add.1 = add i32 %mul.2, %mul.3 664 %sext.add = sext i32 %add to i64 665 %sext.add.1 = sext i32 %add.1 to i64 666 %add.2 = add i64 %sext.add, %sext.add.1 667 %res = sub i64 %acc, %add.2 668 ret i64 %res 669} 670 671; TODO: Why isn't smladx generated too? 672define i32 @exchange_multi_use_4(ptr %a, ptr %b, i32 %acc) { 673; CHECK-LABEL: @exchange_multi_use_4( 674; CHECK-NEXT: entry: 675; CHECK-NEXT: [[ADDR_A_1:%.*]] = getelementptr i16, ptr [[A:%.*]], i32 1 676; CHECK-NEXT: [[ADDR_B_1:%.*]] = getelementptr i16, ptr [[B:%.*]], i32 1 677; CHECK-NEXT: [[LD_A_0:%.*]] = load i16, ptr [[A]], align 2 678; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[A]], align 2 679; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 680; CHECK-NEXT: [[TMP3:%.*]] = sext i16 [[TMP2]] to i32 681; CHECK-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP1]], 16 682; CHECK-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16 683; CHECK-NEXT: [[TMP6:%.*]] = sext i16 [[TMP5]] to i32 684; CHECK-NEXT: [[SEXT_A_0:%.*]] = sext i16 [[LD_A_0]] to i32 685; CHECK-NEXT: [[LD_B_0:%.*]] = load i16, ptr [[B]], align 2 686; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[B]], align 2 687; CHECK-NEXT: [[TMP9:%.*]] = trunc i32 [[TMP8]] to i16 688; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.arm.smlad(i32 [[TMP1]], i32 [[TMP8]], i32 0) 689; CHECK-NEXT: [[TMP11:%.*]] = sext i16 [[TMP9]] to i32 690; CHECK-NEXT: [[TMP12:%.*]] = lshr i32 [[TMP8]], 16 691; CHECK-NEXT: [[TMP13:%.*]] = trunc i32 [[TMP12]] to i16 692; CHECK-NEXT: [[TMP14:%.*]] = sext i16 [[TMP13]] to i32 693; CHECK-NEXT: [[LD_A_1:%.*]] = load i16, ptr [[ADDR_A_1]], align 2 694; CHECK-NEXT: [[LD_B_1:%.*]] = load i16, ptr [[ADDR_B_1]], align 2 695; CHECK-NEXT: [[SEXT_A_1:%.*]] = sext i16 [[LD_A_1]] to i32 696; CHECK-NEXT: [[SEXT_B_1:%.*]] = sext i16 [[LD_B_1]] to i32 697; CHECK-NEXT: [[SEXT_B_0:%.*]] = sext i16 [[LD_B_0]] to i32 698; CHECK-NEXT: [[ADDR_A_2:%.*]] = getelementptr i16, ptr [[A]], i32 2 699; CHECK-NEXT: [[ADDR_A_3:%.*]] = getelementptr i16, ptr [[A]], i32 3 700; CHECK-NEXT: [[LD_A_2:%.*]] = load i16, ptr [[ADDR_A_2]], align 2 701; CHECK-NEXT: [[LD_A_3:%.*]] = load i16, ptr [[ADDR_A_3]], align 2 702; CHECK-NEXT: [[SEXT_A_2:%.*]] = sext i16 [[LD_A_2]] to i32 703; CHECK-NEXT: [[SEXT_A_3:%.*]] = sext i16 [[LD_A_3]] to i32 704; CHECK-NEXT: [[MUL_2:%.*]] = mul i32 [[TMP11]], [[SEXT_A_3]] 705; CHECK-NEXT: [[MUL_3:%.*]] = mul i32 [[TMP14]], [[SEXT_A_2]] 706; CHECK-NEXT: [[MUL_0:%.*]] = mul i32 [[TMP3]], [[TMP11]] 707; CHECK-NEXT: [[MUL_1:%.*]] = mul i32 [[TMP6]], [[TMP14]] 708; CHECK-NEXT: [[ADD_1:%.*]] = add i32 [[MUL_2]], [[MUL_3]] 709; CHECK-NEXT: [[ADD:%.*]] = add i32 [[MUL_0]], [[MUL_1]] 710; CHECK-NEXT: [[SUB:%.*]] = sub i32 [[TMP10]], [[ADD_1]] 711; CHECK-NEXT: [[RES:%.*]] = add i32 [[ACC:%.*]], [[SUB]] 712; CHECK-NEXT: ret i32 [[RES]] 713; 714entry: 715 %addr.a.1 = getelementptr i16, ptr %a, i32 1 716 %addr.b.1 = getelementptr i16, ptr %b, i32 1 717 %ld.a.0 = load i16, ptr %a 718 %sext.a.0 = sext i16 %ld.a.0 to i32 719 %ld.b.0 = load i16, ptr %b 720 %ld.a.1 = load i16, ptr %addr.a.1 721 %ld.b.1 = load i16, ptr %addr.b.1 722 %sext.a.1 = sext i16 %ld.a.1 to i32 723 %sext.b.1 = sext i16 %ld.b.1 to i32 724 %sext.b.0 = sext i16 %ld.b.0 to i32 725 %addr.a.2 = getelementptr i16, ptr %a, i32 2 726 %addr.a.3 = getelementptr i16, ptr %a, i32 3 727 %ld.a.2 = load i16, ptr %addr.a.2 728 %ld.a.3 = load i16, ptr %addr.a.3 729 %sext.a.2 = sext i16 %ld.a.2 to i32 730 %sext.a.3 = sext i16 %ld.a.3 to i32 731 %mul.2 = mul i32 %sext.b.0, %sext.a.3 732 %mul.3 = mul i32 %sext.b.1, %sext.a.2 733 %mul.0 = mul i32 %sext.a.0, %sext.b.0 734 %mul.1 = mul i32 %sext.a.1, %sext.b.1 735 %add.1 = add i32 %mul.2, %mul.3 736 %add = add i32 %mul.0, %mul.1 737 %sub = sub i32 %add, %add.1 738 %res = add i32 %acc, %sub 739 ret i32 %res 740} 741 742define i32 @exchange_swap(ptr %a, ptr %b, i32 %acc) { 743; CHECK-LABEL: @exchange_swap( 744; CHECK-NEXT: entry: 745; CHECK-NEXT: [[ADDR_A_1:%.*]] = getelementptr i16, ptr [[A:%.*]], i32 1 746; CHECK-NEXT: [[ADDR_B_1:%.*]] = getelementptr i16, ptr [[B:%.*]], i32 1 747; CHECK-NEXT: [[LD_A_0:%.*]] = load i16, ptr [[A]], align 2 748; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[A]], align 2 749; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 750; CHECK-NEXT: [[TMP3:%.*]] = sext i16 [[TMP2]] to i32 751; CHECK-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP1]], 16 752; CHECK-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16 753; CHECK-NEXT: [[TMP6:%.*]] = sext i16 [[TMP5]] to i32 754; CHECK-NEXT: [[SEXT_A_0:%.*]] = sext i16 [[LD_A_0]] to i32 755; CHECK-NEXT: [[LD_B_0:%.*]] = load i16, ptr [[B]], align 2 756; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[B]], align 2 757; CHECK-NEXT: [[TMP9:%.*]] = trunc i32 [[TMP8]] to i16 758; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.arm.smladx(i32 [[TMP8]], i32 [[TMP1]], i32 [[ACC:%.*]]) 759; CHECK-NEXT: [[TMP11:%.*]] = sext i16 [[TMP9]] to i32 760; CHECK-NEXT: [[TMP12:%.*]] = lshr i32 [[TMP8]], 16 761; CHECK-NEXT: [[TMP13:%.*]] = trunc i32 [[TMP12]] to i16 762; CHECK-NEXT: [[TMP14:%.*]] = sext i16 [[TMP13]] to i32 763; CHECK-NEXT: [[LD_A_1:%.*]] = load i16, ptr [[ADDR_A_1]], align 2 764; CHECK-NEXT: [[LD_B_1:%.*]] = load i16, ptr [[ADDR_B_1]], align 2 765; CHECK-NEXT: [[SEXT_A_1:%.*]] = sext i16 [[LD_A_1]] to i32 766; CHECK-NEXT: [[SEXT_B_1:%.*]] = sext i16 [[LD_B_1]] to i32 767; CHECK-NEXT: [[SEXT_B_0:%.*]] = sext i16 [[LD_B_0]] to i32 768; CHECK-NEXT: [[MUL_0:%.*]] = mul i32 [[TMP6]], [[TMP11]] 769; CHECK-NEXT: [[MUL_1:%.*]] = mul i32 [[TMP3]], [[TMP14]] 770; CHECK-NEXT: [[ADD:%.*]] = add i32 [[MUL_0]], [[MUL_1]] 771; CHECK-NEXT: [[RES:%.*]] = add i32 [[ADD]], [[ACC]] 772; CHECK-NEXT: ret i32 [[TMP10]] 773; 774entry: 775 %addr.a.1 = getelementptr i16, ptr %a, i32 1 776 %addr.b.1 = getelementptr i16, ptr %b, i32 1 777 %ld.a.0 = load i16, ptr %a 778 %sext.a.0 = sext i16 %ld.a.0 to i32 779 %ld.b.0 = load i16, ptr %b 780 %ld.a.1 = load i16, ptr %addr.a.1 781 %ld.b.1 = load i16, ptr %addr.b.1 782 %sext.a.1 = sext i16 %ld.a.1 to i32 783 %sext.b.1 = sext i16 %ld.b.1 to i32 784 %sext.b.0 = sext i16 %ld.b.0 to i32 785 %mul.0 = mul i32 %sext.a.1, %sext.b.0 786 %mul.1 = mul i32 %sext.a.0, %sext.b.1 787 %add = add i32 %mul.0, %mul.1 788 %res = add i32 %add, %acc 789 ret i32 %res 790} 791 792define i32 @exchange_swap_2(ptr %a, ptr %b, i32 %acc) { 793; CHECK-LABEL: @exchange_swap_2( 794; CHECK-NEXT: entry: 795; CHECK-NEXT: [[ADDR_A_1:%.*]] = getelementptr i16, ptr [[A:%.*]], i32 1 796; CHECK-NEXT: [[ADDR_B_1:%.*]] = getelementptr i16, ptr [[B:%.*]], i32 1 797; CHECK-NEXT: [[LD_A_0:%.*]] = load i16, ptr [[A]], align 2 798; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[A]], align 2 799; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 800; CHECK-NEXT: [[TMP3:%.*]] = sext i16 [[TMP2]] to i32 801; CHECK-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP1]], 16 802; CHECK-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16 803; CHECK-NEXT: [[TMP6:%.*]] = sext i16 [[TMP5]] to i32 804; CHECK-NEXT: [[SEXT_A_0:%.*]] = sext i16 [[LD_A_0]] to i32 805; CHECK-NEXT: [[LD_B_0:%.*]] = load i16, ptr [[B]], align 2 806; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[B]], align 2 807; CHECK-NEXT: [[TMP9:%.*]] = trunc i32 [[TMP8]] to i16 808; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.arm.smladx(i32 [[TMP1]], i32 [[TMP8]], i32 [[ACC:%.*]]) 809; CHECK-NEXT: [[TMP11:%.*]] = sext i16 [[TMP9]] to i32 810; CHECK-NEXT: [[TMP12:%.*]] = lshr i32 [[TMP8]], 16 811; CHECK-NEXT: [[TMP13:%.*]] = trunc i32 [[TMP12]] to i16 812; CHECK-NEXT: [[TMP14:%.*]] = sext i16 [[TMP13]] to i32 813; CHECK-NEXT: [[LD_A_1:%.*]] = load i16, ptr [[ADDR_A_1]], align 2 814; CHECK-NEXT: [[LD_B_1:%.*]] = load i16, ptr [[ADDR_B_1]], align 2 815; CHECK-NEXT: [[SEXT_A_1:%.*]] = sext i16 [[LD_A_1]] to i32 816; CHECK-NEXT: [[SEXT_B_1:%.*]] = sext i16 [[LD_B_1]] to i32 817; CHECK-NEXT: [[SEXT_B_0:%.*]] = sext i16 [[LD_B_0]] to i32 818; CHECK-NEXT: [[MUL_0:%.*]] = mul i32 [[TMP6]], [[TMP11]] 819; CHECK-NEXT: [[MUL_1:%.*]] = mul i32 [[TMP3]], [[TMP14]] 820; CHECK-NEXT: [[ADD:%.*]] = add i32 [[MUL_1]], [[MUL_0]] 821; CHECK-NEXT: [[RES:%.*]] = add i32 [[ADD]], [[ACC]] 822; CHECK-NEXT: ret i32 [[TMP10]] 823; 824entry: 825 %addr.a.1 = getelementptr i16, ptr %a, i32 1 826 %addr.b.1 = getelementptr i16, ptr %b, i32 1 827 %ld.a.0 = load i16, ptr %a 828 %sext.a.0 = sext i16 %ld.a.0 to i32 829 %ld.b.0 = load i16, ptr %b 830 %ld.a.1 = load i16, ptr %addr.a.1 831 %ld.b.1 = load i16, ptr %addr.b.1 832 %sext.a.1 = sext i16 %ld.a.1 to i32 833 %sext.b.1 = sext i16 %ld.b.1 to i32 834 %sext.b.0 = sext i16 %ld.b.0 to i32 835 %mul.0 = mul i32 %sext.a.1, %sext.b.0 836 %mul.1 = mul i32 %sext.a.0, %sext.b.1 837 %add = add i32 %mul.1, %mul.0 838 %res = add i32 %add, %acc 839 ret i32 %res 840} 841 842define i32 @exchange_swap_3(ptr %a, ptr %b, i32 %acc) { 843; CHECK-LABEL: @exchange_swap_3( 844; CHECK-NEXT: entry: 845; CHECK-NEXT: [[ADDR_A_1:%.*]] = getelementptr i16, ptr [[A:%.*]], i32 1 846; CHECK-NEXT: [[ADDR_B_1:%.*]] = getelementptr i16, ptr [[B:%.*]], i32 1 847; CHECK-NEXT: [[LD_A_0:%.*]] = load i16, ptr [[A]], align 2 848; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[A]], align 2 849; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 850; CHECK-NEXT: [[TMP3:%.*]] = sext i16 [[TMP2]] to i32 851; CHECK-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP1]], 16 852; CHECK-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16 853; CHECK-NEXT: [[TMP6:%.*]] = sext i16 [[TMP5]] to i32 854; CHECK-NEXT: [[SEXT_A_0:%.*]] = sext i16 [[LD_A_0]] to i32 855; CHECK-NEXT: [[LD_B_0:%.*]] = load i16, ptr [[B]], align 2 856; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[B]], align 2 857; CHECK-NEXT: [[TMP9:%.*]] = trunc i32 [[TMP8]] to i16 858; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.arm.smladx(i32 [[TMP1]], i32 [[TMP8]], i32 [[ACC:%.*]]) 859; CHECK-NEXT: [[TMP11:%.*]] = sext i16 [[TMP9]] to i32 860; CHECK-NEXT: [[TMP12:%.*]] = lshr i32 [[TMP8]], 16 861; CHECK-NEXT: [[TMP13:%.*]] = trunc i32 [[TMP12]] to i16 862; CHECK-NEXT: [[TMP14:%.*]] = sext i16 [[TMP13]] to i32 863; CHECK-NEXT: [[LD_A_1:%.*]] = load i16, ptr [[ADDR_A_1]], align 2 864; CHECK-NEXT: [[LD_B_1:%.*]] = load i16, ptr [[ADDR_B_1]], align 2 865; CHECK-NEXT: [[SEXT_A_1:%.*]] = sext i16 [[LD_A_1]] to i32 866; CHECK-NEXT: [[SEXT_B_1:%.*]] = sext i16 [[LD_B_1]] to i32 867; CHECK-NEXT: [[SEXT_B_0:%.*]] = sext i16 [[LD_B_0]] to i32 868; CHECK-NEXT: [[MUL_0:%.*]] = mul i32 [[TMP11]], [[TMP6]] 869; CHECK-NEXT: [[MUL_1:%.*]] = mul i32 [[TMP14]], [[TMP3]] 870; CHECK-NEXT: [[ADD:%.*]] = add i32 [[MUL_1]], [[MUL_0]] 871; CHECK-NEXT: [[RES:%.*]] = add i32 [[ADD]], [[ACC]] 872; CHECK-NEXT: ret i32 [[TMP10]] 873; 874entry: 875 %addr.a.1 = getelementptr i16, ptr %a, i32 1 876 %addr.b.1 = getelementptr i16, ptr %b, i32 1 877 %ld.a.0 = load i16, ptr %a 878 %sext.a.0 = sext i16 %ld.a.0 to i32 879 %ld.b.0 = load i16, ptr %b 880 %ld.a.1 = load i16, ptr %addr.a.1 881 %ld.b.1 = load i16, ptr %addr.b.1 882 %sext.a.1 = sext i16 %ld.a.1 to i32 883 %sext.b.1 = sext i16 %ld.b.1 to i32 884 %sext.b.0 = sext i16 %ld.b.0 to i32 885 %mul.0 = mul i32 %sext.b.0, %sext.a.1 886 %mul.1 = mul i32 %sext.b.1, %sext.a.0 887 %add = add i32 %mul.1, %mul.0 888 %res = add i32 %add, %acc 889 ret i32 %res 890} 891