1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 3; RUN: llc -mtriple=thumbv7em -mcpu=cortex-m4 -O3 %s -o - | FileCheck %s --check-prefix=CHECK-LLC 4; RUN: opt -S -mtriple=armv7-a -arm-parallel-dsp -dce %s -o - | FileCheck %s --check-prefix=CHECK-OPT 5 6define dso_local arm_aapcscc void @complex_dot_prod(ptr nocapture readonly %pSrcA, ptr nocapture readonly %pSrcB, ptr nocapture %realResult, ptr nocapture %imagResult) { 7; CHECK-LLC-LABEL: complex_dot_prod: 8; CHECK-LLC: @ %bb.0: @ %entry 9; CHECK-LLC-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} 10; CHECK-LLC-NEXT: ldr.w r12, [r0] 11; CHECK-LLC-NEXT: ldr r5, [r1] 12; CHECK-LLC-NEXT: ldr.w lr, [r0, #4] 13; CHECK-LLC-NEXT: ldr.w r10, [r0, #8] 14; CHECK-LLC-NEXT: ldr.w r8, [r0, #12] 15; CHECK-LLC-NEXT: ldr r6, [r1, #4] 16; CHECK-LLC-NEXT: ldr r7, [r1, #8] 17; CHECK-LLC-NEXT: ldr.w r9, [r1, #12] 18; CHECK-LLC-NEXT: movs r0, #0 19; CHECK-LLC-NEXT: movs r1, #0 20; CHECK-LLC-NEXT: smlaldx r0, r1, r12, r5 21; CHECK-LLC-NEXT: smulbb r4, r5, r12 22; CHECK-LLC-NEXT: smultt r5, r5, r12 23; CHECK-LLC-NEXT: asr.w r11, r4, #31 24; CHECK-LLC-NEXT: subs r4, r4, r5 25; CHECK-LLC-NEXT: sbc.w r5, r11, r5, asr #31 26; CHECK-LLC-NEXT: smlaldx r0, r1, lr, r6 27; CHECK-LLC-NEXT: smlalbb r4, r5, r6, lr 28; CHECK-LLC-NEXT: smultt r6, r6, lr 29; CHECK-LLC-NEXT: subs r4, r4, r6 30; CHECK-LLC-NEXT: sbc.w r6, r5, r6, asr #31 31; CHECK-LLC-NEXT: smlaldx r0, r1, r10, r7 32; CHECK-LLC-NEXT: smlalbb r4, r6, r7, r10 33; CHECK-LLC-NEXT: smultt r7, r7, r10 34; CHECK-LLC-NEXT: subs r5, r4, r7 35; CHECK-LLC-NEXT: sbc.w r7, r6, r7, asr #31 36; CHECK-LLC-NEXT: smlalbb r5, r7, r9, r8 37; CHECK-LLC-NEXT: smultt r6, r9, r8 38; CHECK-LLC-NEXT: smlaldx r0, r1, r8, r9 39; CHECK-LLC-NEXT: subs r5, r5, r6 40; CHECK-LLC-NEXT: sbc.w r7, r7, r6, asr #31 41; CHECK-LLC-NEXT: lsrs r6, r5, #6 42; CHECK-LLC-NEXT: lsrs r0, r0, #6 43; CHECK-LLC-NEXT: orr.w r7, r6, r7, lsl #26 44; CHECK-LLC-NEXT: orr.w r0, r0, r1, lsl #26 45; CHECK-LLC-NEXT: str r7, [r2] 46; CHECK-LLC-NEXT: str r0, [r3] 47; CHECK-LLC-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} 48; CHECK-LCC: pop.w {r4, r5, r6, r7, r8, r9, r10, pc} 49; CHECK-OPT-LABEL: define dso_local arm_aapcscc void @complex_dot_prod( 50; CHECK-OPT-SAME: ptr nocapture readonly [[PSRCA:%.*]], ptr nocapture readonly [[PSRCB:%.*]], ptr nocapture [[REALRESULT:%.*]], ptr nocapture [[IMAGRESULT:%.*]]) { 51; CHECK-OPT-NEXT: entry: 52; CHECK-OPT-NEXT: [[TMP0:%.*]] = load i32, ptr [[PSRCA]], align 2 53; CHECK-OPT-NEXT: [[TMP1:%.*]] = trunc i32 [[TMP0]] to i16 54; CHECK-OPT-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 55; CHECK-OPT-NEXT: [[TMP3:%.*]] = lshr i32 [[TMP0]], 16 56; CHECK-OPT-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16 57; CHECK-OPT-NEXT: [[TMP5:%.*]] = sext i16 [[TMP4]] to i32 58; CHECK-OPT-NEXT: [[INCDEC_PTR1:%.*]] = getelementptr inbounds i16, ptr [[PSRCA]], i32 2 59; CHECK-OPT-NEXT: [[TMP6:%.*]] = load i32, ptr [[PSRCB]], align 2 60; CHECK-OPT-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP6]] to i16 61; CHECK-OPT-NEXT: [[TMP8:%.*]] = call i64 @llvm.arm.smlaldx(i32 [[TMP0]], i32 [[TMP6]], i64 0) 62; CHECK-OPT-NEXT: [[TMP9:%.*]] = sext i16 [[TMP7]] to i32 63; CHECK-OPT-NEXT: [[TMP10:%.*]] = lshr i32 [[TMP6]], 16 64; CHECK-OPT-NEXT: [[TMP11:%.*]] = trunc i32 [[TMP10]] to i16 65; CHECK-OPT-NEXT: [[TMP12:%.*]] = sext i16 [[TMP11]] to i32 66; CHECK-OPT-NEXT: [[INCDEC_PTR3:%.*]] = getelementptr inbounds i16, ptr [[PSRCB]], i32 2 67; CHECK-OPT-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], [[TMP2]] 68; CHECK-OPT-NEXT: [[CONV5:%.*]] = sext i32 [[MUL]] to i64 69; CHECK-OPT-NEXT: [[MUL13:%.*]] = mul nsw i32 [[TMP12]], [[TMP5]] 70; CHECK-OPT-NEXT: [[CONV14:%.*]] = sext i32 [[MUL13]] to i64 71; CHECK-OPT-NEXT: [[SUB:%.*]] = sub nsw i64 [[CONV5]], [[CONV14]] 72; CHECK-OPT-NEXT: [[TMP13:%.*]] = load i32, ptr [[INCDEC_PTR1]], align 2 73; CHECK-OPT-NEXT: [[TMP14:%.*]] = trunc i32 [[TMP13]] to i16 74; CHECK-OPT-NEXT: [[TMP15:%.*]] = sext i16 [[TMP14]] to i32 75; CHECK-OPT-NEXT: [[TMP16:%.*]] = lshr i32 [[TMP13]], 16 76; CHECK-OPT-NEXT: [[TMP17:%.*]] = trunc i32 [[TMP16]] to i16 77; CHECK-OPT-NEXT: [[TMP18:%.*]] = sext i16 [[TMP17]] to i32 78; CHECK-OPT-NEXT: [[INCDEC_PTR21:%.*]] = getelementptr inbounds i16, ptr [[PSRCA]], i32 4 79; CHECK-OPT-NEXT: [[TMP19:%.*]] = load i32, ptr [[INCDEC_PTR3]], align 2 80; CHECK-OPT-NEXT: [[TMP20:%.*]] = trunc i32 [[TMP19]] to i16 81; CHECK-OPT-NEXT: [[TMP21:%.*]] = call i64 @llvm.arm.smlaldx(i32 [[TMP13]], i32 [[TMP19]], i64 [[TMP8]]) 82; CHECK-OPT-NEXT: [[TMP22:%.*]] = sext i16 [[TMP20]] to i32 83; CHECK-OPT-NEXT: [[TMP23:%.*]] = lshr i32 [[TMP19]], 16 84; CHECK-OPT-NEXT: [[TMP24:%.*]] = trunc i32 [[TMP23]] to i16 85; CHECK-OPT-NEXT: [[TMP25:%.*]] = sext i16 [[TMP24]] to i32 86; CHECK-OPT-NEXT: [[INCDEC_PTR23:%.*]] = getelementptr inbounds i16, ptr [[PSRCB]], i32 4 87; CHECK-OPT-NEXT: [[MUL26:%.*]] = mul nsw i32 [[TMP22]], [[TMP15]] 88; CHECK-OPT-NEXT: [[CONV27:%.*]] = sext i32 [[MUL26]] to i64 89; CHECK-OPT-NEXT: [[ADD28:%.*]] = add nsw i64 [[SUB]], [[CONV27]] 90; CHECK-OPT-NEXT: [[MUL36:%.*]] = mul nsw i32 [[TMP25]], [[TMP18]] 91; CHECK-OPT-NEXT: [[CONV37:%.*]] = sext i32 [[MUL36]] to i64 92; CHECK-OPT-NEXT: [[SUB38:%.*]] = sub nsw i64 [[ADD28]], [[CONV37]] 93; CHECK-OPT-NEXT: [[TMP26:%.*]] = load i32, ptr [[INCDEC_PTR21]], align 2 94; CHECK-OPT-NEXT: [[TMP27:%.*]] = trunc i32 [[TMP26]] to i16 95; CHECK-OPT-NEXT: [[TMP28:%.*]] = sext i16 [[TMP27]] to i32 96; CHECK-OPT-NEXT: [[TMP29:%.*]] = lshr i32 [[TMP26]], 16 97; CHECK-OPT-NEXT: [[TMP30:%.*]] = trunc i32 [[TMP29]] to i16 98; CHECK-OPT-NEXT: [[TMP31:%.*]] = sext i16 [[TMP30]] to i32 99; CHECK-OPT-NEXT: [[INCDEC_PTR45:%.*]] = getelementptr inbounds i16, ptr [[PSRCA]], i32 6 100; CHECK-OPT-NEXT: [[TMP32:%.*]] = load i32, ptr [[INCDEC_PTR23]], align 2 101; CHECK-OPT-NEXT: [[TMP33:%.*]] = trunc i32 [[TMP32]] to i16 102; CHECK-OPT-NEXT: [[TMP34:%.*]] = call i64 @llvm.arm.smlaldx(i32 [[TMP26]], i32 [[TMP32]], i64 [[TMP21]]) 103; CHECK-OPT-NEXT: [[TMP35:%.*]] = sext i16 [[TMP33]] to i32 104; CHECK-OPT-NEXT: [[TMP36:%.*]] = lshr i32 [[TMP32]], 16 105; CHECK-OPT-NEXT: [[TMP37:%.*]] = trunc i32 [[TMP36]] to i16 106; CHECK-OPT-NEXT: [[TMP38:%.*]] = sext i16 [[TMP37]] to i32 107; CHECK-OPT-NEXT: [[INCDEC_PTR47:%.*]] = getelementptr inbounds i16, ptr [[PSRCB]], i32 6 108; CHECK-OPT-NEXT: [[MUL50:%.*]] = mul nsw i32 [[TMP35]], [[TMP28]] 109; CHECK-OPT-NEXT: [[CONV51:%.*]] = sext i32 [[MUL50]] to i64 110; CHECK-OPT-NEXT: [[ADD52:%.*]] = add nsw i64 [[SUB38]], [[CONV51]] 111; CHECK-OPT-NEXT: [[MUL60:%.*]] = mul nsw i32 [[TMP38]], [[TMP31]] 112; CHECK-OPT-NEXT: [[CONV61:%.*]] = sext i32 [[MUL60]] to i64 113; CHECK-OPT-NEXT: [[SUB62:%.*]] = sub nsw i64 [[ADD52]], [[CONV61]] 114; CHECK-OPT-NEXT: [[TMP39:%.*]] = load i32, ptr [[INCDEC_PTR45]], align 2 115; CHECK-OPT-NEXT: [[TMP40:%.*]] = trunc i32 [[TMP39]] to i16 116; CHECK-OPT-NEXT: [[TMP41:%.*]] = sext i16 [[TMP40]] to i32 117; CHECK-OPT-NEXT: [[TMP42:%.*]] = lshr i32 [[TMP39]], 16 118; CHECK-OPT-NEXT: [[TMP43:%.*]] = trunc i32 [[TMP42]] to i16 119; CHECK-OPT-NEXT: [[TMP44:%.*]] = sext i16 [[TMP43]] to i32 120; CHECK-OPT-NEXT: [[TMP45:%.*]] = load i32, ptr [[INCDEC_PTR47]], align 2 121; CHECK-OPT-NEXT: [[TMP46:%.*]] = trunc i32 [[TMP45]] to i16 122; CHECK-OPT-NEXT: [[TMP47:%.*]] = call i64 @llvm.arm.smlaldx(i32 [[TMP39]], i32 [[TMP45]], i64 [[TMP34]]) 123; CHECK-OPT-NEXT: [[TMP48:%.*]] = sext i16 [[TMP46]] to i32 124; CHECK-OPT-NEXT: [[TMP49:%.*]] = lshr i32 [[TMP45]], 16 125; CHECK-OPT-NEXT: [[TMP50:%.*]] = trunc i32 [[TMP49]] to i16 126; CHECK-OPT-NEXT: [[TMP51:%.*]] = sext i16 [[TMP50]] to i32 127; CHECK-OPT-NEXT: [[MUL74:%.*]] = mul nsw i32 [[TMP48]], [[TMP41]] 128; CHECK-OPT-NEXT: [[CONV75:%.*]] = sext i32 [[MUL74]] to i64 129; CHECK-OPT-NEXT: [[ADD76:%.*]] = add nsw i64 [[SUB62]], [[CONV75]] 130; CHECK-OPT-NEXT: [[MUL84:%.*]] = mul nsw i32 [[TMP51]], [[TMP44]] 131; CHECK-OPT-NEXT: [[CONV85:%.*]] = sext i32 [[MUL84]] to i64 132; CHECK-OPT-NEXT: [[SUB86:%.*]] = sub nsw i64 [[ADD76]], [[CONV85]] 133; CHECK-OPT-NEXT: [[TMP52:%.*]] = lshr i64 [[SUB86]], 6 134; CHECK-OPT-NEXT: [[CONV92:%.*]] = trunc i64 [[TMP52]] to i32 135; CHECK-OPT-NEXT: store i32 [[CONV92]], ptr [[REALRESULT]], align 4 136; CHECK-OPT-NEXT: [[TMP53:%.*]] = lshr i64 [[TMP47]], 6 137; CHECK-OPT-NEXT: [[CONV94:%.*]] = trunc i64 [[TMP53]] to i32 138; CHECK-OPT-NEXT: store i32 [[CONV94]], ptr [[IMAGRESULT]], align 4 139; CHECK-OPT-NEXT: ret void 140; 141entry: 142 %incdec.ptr = getelementptr inbounds i16, ptr %pSrcA, i32 1 143 %0 = load i16, ptr %pSrcA, align 2 144 %incdec.ptr1 = getelementptr inbounds i16, ptr %pSrcA, i32 2 145 %1 = load i16, ptr %incdec.ptr, align 2 146 %incdec.ptr2 = getelementptr inbounds i16, ptr %pSrcB, i32 1 147 %2 = load i16, ptr %pSrcB, align 2 148 %incdec.ptr3 = getelementptr inbounds i16, ptr %pSrcB, i32 2 149 %3 = load i16, ptr %incdec.ptr2, align 2 150 %conv = sext i16 %0 to i32 151 %conv4 = sext i16 %2 to i32 152 %mul = mul nsw i32 %conv4, %conv 153 %conv5 = sext i32 %mul to i64 154 %conv7 = sext i16 %3 to i32 155 %mul8 = mul nsw i32 %conv7, %conv 156 %conv9 = sext i32 %mul8 to i64 157 %conv11 = sext i16 %1 to i32 158 %mul13 = mul nsw i32 %conv7, %conv11 159 %conv14 = sext i32 %mul13 to i64 160 %sub = sub nsw i64 %conv5, %conv14 161 %mul17 = mul nsw i32 %conv4, %conv11 162 %conv18 = sext i32 %mul17 to i64 163 %add19 = add nsw i64 %conv9, %conv18 164 %incdec.ptr20 = getelementptr inbounds i16, ptr %pSrcA, i32 3 165 %4 = load i16, ptr %incdec.ptr1, align 2 166 %incdec.ptr21 = getelementptr inbounds i16, ptr %pSrcA, i32 4 167 %5 = load i16, ptr %incdec.ptr20, align 2 168 %incdec.ptr22 = getelementptr inbounds i16, ptr %pSrcB, i32 3 169 %6 = load i16, ptr %incdec.ptr3, align 2 170 %incdec.ptr23 = getelementptr inbounds i16, ptr %pSrcB, i32 4 171 %7 = load i16, ptr %incdec.ptr22, align 2 172 %conv24 = sext i16 %4 to i32 173 %conv25 = sext i16 %6 to i32 174 %mul26 = mul nsw i32 %conv25, %conv24 175 %conv27 = sext i32 %mul26 to i64 176 %add28 = add nsw i64 %sub, %conv27 177 %conv30 = sext i16 %7 to i32 178 %mul31 = mul nsw i32 %conv30, %conv24 179 %conv32 = sext i32 %mul31 to i64 180 %conv34 = sext i16 %5 to i32 181 %mul36 = mul nsw i32 %conv30, %conv34 182 %conv37 = sext i32 %mul36 to i64 183 %sub38 = sub nsw i64 %add28, %conv37 184 %mul41 = mul nsw i32 %conv25, %conv34 185 %conv42 = sext i32 %mul41 to i64 186 %add33 = add nsw i64 %add19, %conv42 187 %add43 = add nsw i64 %add33, %conv32 188 %incdec.ptr44 = getelementptr inbounds i16, ptr %pSrcA, i32 5 189 %8 = load i16, ptr %incdec.ptr21, align 2 190 %incdec.ptr45 = getelementptr inbounds i16, ptr %pSrcA, i32 6 191 %9 = load i16, ptr %incdec.ptr44, align 2 192 %incdec.ptr46 = getelementptr inbounds i16, ptr %pSrcB, i32 5 193 %10 = load i16, ptr %incdec.ptr23, align 2 194 %incdec.ptr47 = getelementptr inbounds i16, ptr %pSrcB, i32 6 195 %11 = load i16, ptr %incdec.ptr46, align 2 196 %conv48 = sext i16 %8 to i32 197 %conv49 = sext i16 %10 to i32 198 %mul50 = mul nsw i32 %conv49, %conv48 199 %conv51 = sext i32 %mul50 to i64 200 %add52 = add nsw i64 %sub38, %conv51 201 %conv54 = sext i16 %11 to i32 202 %mul55 = mul nsw i32 %conv54, %conv48 203 %conv56 = sext i32 %mul55 to i64 204 %conv58 = sext i16 %9 to i32 205 %mul60 = mul nsw i32 %conv54, %conv58 206 %conv61 = sext i32 %mul60 to i64 207 %sub62 = sub nsw i64 %add52, %conv61 208 %mul65 = mul nsw i32 %conv49, %conv58 209 %conv66 = sext i32 %mul65 to i64 210 %add57 = add nsw i64 %add43, %conv66 211 %add67 = add nsw i64 %add57, %conv56 212 %incdec.ptr68 = getelementptr inbounds i16, ptr %pSrcA, i32 7 213 %12 = load i16, ptr %incdec.ptr45, align 2 214 %13 = load i16, ptr %incdec.ptr68, align 2 215 %incdec.ptr70 = getelementptr inbounds i16, ptr %pSrcB, i32 7 216 %14 = load i16, ptr %incdec.ptr47, align 2 217 %15 = load i16, ptr %incdec.ptr70, align 2 218 %conv72 = sext i16 %12 to i32 219 %conv73 = sext i16 %14 to i32 220 %mul74 = mul nsw i32 %conv73, %conv72 221 %conv75 = sext i32 %mul74 to i64 222 %add76 = add nsw i64 %sub62, %conv75 223 %conv78 = sext i16 %15 to i32 224 %mul79 = mul nsw i32 %conv78, %conv72 225 %conv80 = sext i32 %mul79 to i64 226 %conv82 = sext i16 %13 to i32 227 %mul84 = mul nsw i32 %conv78, %conv82 228 %conv85 = sext i32 %mul84 to i64 229 %sub86 = sub nsw i64 %add76, %conv85 230 %mul89 = mul nsw i32 %conv73, %conv82 231 %conv90 = sext i32 %mul89 to i64 232 %add81 = add nsw i64 %add67, %conv90 233 %add91 = add nsw i64 %add81, %conv80 234 %16 = lshr i64 %sub86, 6 235 %conv92 = trunc i64 %16 to i32 236 store i32 %conv92, ptr %realResult, align 4 237 %17 = lshr i64 %add91, 6 238 %conv94 = trunc i64 %17 to i32 239 store i32 %conv94, ptr %imagResult, align 4 240 ret void 241} 242