xref: /llvm-project/llvm/test/CodeGen/ARM/ParallelDSP/complex_dot_prod.ll (revision 2501ae58e3bb9a70d279a56d7b3a0ed70a8a852c)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
3; RUN: llc -mtriple=thumbv7em -mcpu=cortex-m4 -O3 %s -o - | FileCheck %s --check-prefix=CHECK-LLC
4; RUN: opt -S -mtriple=armv7-a -arm-parallel-dsp -dce %s -o - | FileCheck %s --check-prefix=CHECK-OPT
5
6define dso_local arm_aapcscc void @complex_dot_prod(ptr nocapture readonly %pSrcA, ptr nocapture readonly %pSrcB, ptr nocapture %realResult, ptr nocapture %imagResult) {
7; CHECK-LLC-LABEL: complex_dot_prod:
8; CHECK-LLC:       @ %bb.0: @ %entry
9; CHECK-LLC-NEXT:    push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
10; CHECK-LLC-NEXT:    ldr.w r12, [r0]
11; CHECK-LLC-NEXT:    ldr r5, [r1]
12; CHECK-LLC-NEXT:    ldr.w lr, [r0, #4]
13; CHECK-LLC-NEXT:    ldr.w r10, [r0, #8]
14; CHECK-LLC-NEXT:    ldr.w r8, [r0, #12]
15; CHECK-LLC-NEXT:    ldr r6, [r1, #4]
16; CHECK-LLC-NEXT:    ldr r7, [r1, #8]
17; CHECK-LLC-NEXT:    ldr.w r9, [r1, #12]
18; CHECK-LLC-NEXT:    movs r0, #0
19; CHECK-LLC-NEXT:    movs r1, #0
20; CHECK-LLC-NEXT:    smlaldx r0, r1, r12, r5
21; CHECK-LLC-NEXT:    smulbb r4, r5, r12
22; CHECK-LLC-NEXT:    smultt r5, r5, r12
23; CHECK-LLC-NEXT:    asr.w r11, r4, #31
24; CHECK-LLC-NEXT:    subs r4, r4, r5
25; CHECK-LLC-NEXT:    sbc.w r5, r11, r5, asr #31
26; CHECK-LLC-NEXT:    smlaldx r0, r1, lr, r6
27; CHECK-LLC-NEXT:    smlalbb r4, r5, r6, lr
28; CHECK-LLC-NEXT:    smultt r6, r6, lr
29; CHECK-LLC-NEXT:    subs r4, r4, r6
30; CHECK-LLC-NEXT:    sbc.w r6, r5, r6, asr #31
31; CHECK-LLC-NEXT:    smlaldx r0, r1, r10, r7
32; CHECK-LLC-NEXT:    smlalbb r4, r6, r7, r10
33; CHECK-LLC-NEXT:    smultt r7, r7, r10
34; CHECK-LLC-NEXT:    subs r5, r4, r7
35; CHECK-LLC-NEXT:    sbc.w r7, r6, r7, asr #31
36; CHECK-LLC-NEXT:    smlalbb r5, r7, r9, r8
37; CHECK-LLC-NEXT:    smultt r6, r9, r8
38; CHECK-LLC-NEXT:    smlaldx r0, r1, r8, r9
39; CHECK-LLC-NEXT:    subs r5, r5, r6
40; CHECK-LLC-NEXT:    sbc.w r7, r7, r6, asr #31
41; CHECK-LLC-NEXT:    lsrs r6, r5, #6
42; CHECK-LLC-NEXT:    lsrs r0, r0, #6
43; CHECK-LLC-NEXT:    orr.w r7, r6, r7, lsl #26
44; CHECK-LLC-NEXT:    orr.w r0, r0, r1, lsl #26
45; CHECK-LLC-NEXT:    str r7, [r2]
46; CHECK-LLC-NEXT:    str r0, [r3]
47; CHECK-LLC-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
48; CHECK-LCC: pop.w {r4, r5, r6, r7, r8, r9, r10, pc}
49; CHECK-OPT-LABEL: define dso_local arm_aapcscc void @complex_dot_prod(
50; CHECK-OPT-SAME: ptr nocapture readonly [[PSRCA:%.*]], ptr nocapture readonly [[PSRCB:%.*]], ptr nocapture [[REALRESULT:%.*]], ptr nocapture [[IMAGRESULT:%.*]]) {
51; CHECK-OPT-NEXT:  entry:
52; CHECK-OPT-NEXT:    [[TMP0:%.*]] = load i32, ptr [[PSRCA]], align 2
53; CHECK-OPT-NEXT:    [[TMP1:%.*]] = trunc i32 [[TMP0]] to i16
54; CHECK-OPT-NEXT:    [[TMP2:%.*]] = sext i16 [[TMP1]] to i32
55; CHECK-OPT-NEXT:    [[TMP3:%.*]] = lshr i32 [[TMP0]], 16
56; CHECK-OPT-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
57; CHECK-OPT-NEXT:    [[TMP5:%.*]] = sext i16 [[TMP4]] to i32
58; CHECK-OPT-NEXT:    [[INCDEC_PTR1:%.*]] = getelementptr inbounds i16, ptr [[PSRCA]], i32 2
59; CHECK-OPT-NEXT:    [[TMP6:%.*]] = load i32, ptr [[PSRCB]], align 2
60; CHECK-OPT-NEXT:    [[TMP7:%.*]] = trunc i32 [[TMP6]] to i16
61; CHECK-OPT-NEXT:    [[TMP8:%.*]] = call i64 @llvm.arm.smlaldx(i32 [[TMP0]], i32 [[TMP6]], i64 0)
62; CHECK-OPT-NEXT:    [[TMP9:%.*]] = sext i16 [[TMP7]] to i32
63; CHECK-OPT-NEXT:    [[TMP10:%.*]] = lshr i32 [[TMP6]], 16
64; CHECK-OPT-NEXT:    [[TMP11:%.*]] = trunc i32 [[TMP10]] to i16
65; CHECK-OPT-NEXT:    [[TMP12:%.*]] = sext i16 [[TMP11]] to i32
66; CHECK-OPT-NEXT:    [[INCDEC_PTR3:%.*]] = getelementptr inbounds i16, ptr [[PSRCB]], i32 2
67; CHECK-OPT-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP9]], [[TMP2]]
68; CHECK-OPT-NEXT:    [[CONV5:%.*]] = sext i32 [[MUL]] to i64
69; CHECK-OPT-NEXT:    [[MUL13:%.*]] = mul nsw i32 [[TMP12]], [[TMP5]]
70; CHECK-OPT-NEXT:    [[CONV14:%.*]] = sext i32 [[MUL13]] to i64
71; CHECK-OPT-NEXT:    [[SUB:%.*]] = sub nsw i64 [[CONV5]], [[CONV14]]
72; CHECK-OPT-NEXT:    [[TMP13:%.*]] = load i32, ptr [[INCDEC_PTR1]], align 2
73; CHECK-OPT-NEXT:    [[TMP14:%.*]] = trunc i32 [[TMP13]] to i16
74; CHECK-OPT-NEXT:    [[TMP15:%.*]] = sext i16 [[TMP14]] to i32
75; CHECK-OPT-NEXT:    [[TMP16:%.*]] = lshr i32 [[TMP13]], 16
76; CHECK-OPT-NEXT:    [[TMP17:%.*]] = trunc i32 [[TMP16]] to i16
77; CHECK-OPT-NEXT:    [[TMP18:%.*]] = sext i16 [[TMP17]] to i32
78; CHECK-OPT-NEXT:    [[INCDEC_PTR21:%.*]] = getelementptr inbounds i16, ptr [[PSRCA]], i32 4
79; CHECK-OPT-NEXT:    [[TMP19:%.*]] = load i32, ptr [[INCDEC_PTR3]], align 2
80; CHECK-OPT-NEXT:    [[TMP20:%.*]] = trunc i32 [[TMP19]] to i16
81; CHECK-OPT-NEXT:    [[TMP21:%.*]] = call i64 @llvm.arm.smlaldx(i32 [[TMP13]], i32 [[TMP19]], i64 [[TMP8]])
82; CHECK-OPT-NEXT:    [[TMP22:%.*]] = sext i16 [[TMP20]] to i32
83; CHECK-OPT-NEXT:    [[TMP23:%.*]] = lshr i32 [[TMP19]], 16
84; CHECK-OPT-NEXT:    [[TMP24:%.*]] = trunc i32 [[TMP23]] to i16
85; CHECK-OPT-NEXT:    [[TMP25:%.*]] = sext i16 [[TMP24]] to i32
86; CHECK-OPT-NEXT:    [[INCDEC_PTR23:%.*]] = getelementptr inbounds i16, ptr [[PSRCB]], i32 4
87; CHECK-OPT-NEXT:    [[MUL26:%.*]] = mul nsw i32 [[TMP22]], [[TMP15]]
88; CHECK-OPT-NEXT:    [[CONV27:%.*]] = sext i32 [[MUL26]] to i64
89; CHECK-OPT-NEXT:    [[ADD28:%.*]] = add nsw i64 [[SUB]], [[CONV27]]
90; CHECK-OPT-NEXT:    [[MUL36:%.*]] = mul nsw i32 [[TMP25]], [[TMP18]]
91; CHECK-OPT-NEXT:    [[CONV37:%.*]] = sext i32 [[MUL36]] to i64
92; CHECK-OPT-NEXT:    [[SUB38:%.*]] = sub nsw i64 [[ADD28]], [[CONV37]]
93; CHECK-OPT-NEXT:    [[TMP26:%.*]] = load i32, ptr [[INCDEC_PTR21]], align 2
94; CHECK-OPT-NEXT:    [[TMP27:%.*]] = trunc i32 [[TMP26]] to i16
95; CHECK-OPT-NEXT:    [[TMP28:%.*]] = sext i16 [[TMP27]] to i32
96; CHECK-OPT-NEXT:    [[TMP29:%.*]] = lshr i32 [[TMP26]], 16
97; CHECK-OPT-NEXT:    [[TMP30:%.*]] = trunc i32 [[TMP29]] to i16
98; CHECK-OPT-NEXT:    [[TMP31:%.*]] = sext i16 [[TMP30]] to i32
99; CHECK-OPT-NEXT:    [[INCDEC_PTR45:%.*]] = getelementptr inbounds i16, ptr [[PSRCA]], i32 6
100; CHECK-OPT-NEXT:    [[TMP32:%.*]] = load i32, ptr [[INCDEC_PTR23]], align 2
101; CHECK-OPT-NEXT:    [[TMP33:%.*]] = trunc i32 [[TMP32]] to i16
102; CHECK-OPT-NEXT:    [[TMP34:%.*]] = call i64 @llvm.arm.smlaldx(i32 [[TMP26]], i32 [[TMP32]], i64 [[TMP21]])
103; CHECK-OPT-NEXT:    [[TMP35:%.*]] = sext i16 [[TMP33]] to i32
104; CHECK-OPT-NEXT:    [[TMP36:%.*]] = lshr i32 [[TMP32]], 16
105; CHECK-OPT-NEXT:    [[TMP37:%.*]] = trunc i32 [[TMP36]] to i16
106; CHECK-OPT-NEXT:    [[TMP38:%.*]] = sext i16 [[TMP37]] to i32
107; CHECK-OPT-NEXT:    [[INCDEC_PTR47:%.*]] = getelementptr inbounds i16, ptr [[PSRCB]], i32 6
108; CHECK-OPT-NEXT:    [[MUL50:%.*]] = mul nsw i32 [[TMP35]], [[TMP28]]
109; CHECK-OPT-NEXT:    [[CONV51:%.*]] = sext i32 [[MUL50]] to i64
110; CHECK-OPT-NEXT:    [[ADD52:%.*]] = add nsw i64 [[SUB38]], [[CONV51]]
111; CHECK-OPT-NEXT:    [[MUL60:%.*]] = mul nsw i32 [[TMP38]], [[TMP31]]
112; CHECK-OPT-NEXT:    [[CONV61:%.*]] = sext i32 [[MUL60]] to i64
113; CHECK-OPT-NEXT:    [[SUB62:%.*]] = sub nsw i64 [[ADD52]], [[CONV61]]
114; CHECK-OPT-NEXT:    [[TMP39:%.*]] = load i32, ptr [[INCDEC_PTR45]], align 2
115; CHECK-OPT-NEXT:    [[TMP40:%.*]] = trunc i32 [[TMP39]] to i16
116; CHECK-OPT-NEXT:    [[TMP41:%.*]] = sext i16 [[TMP40]] to i32
117; CHECK-OPT-NEXT:    [[TMP42:%.*]] = lshr i32 [[TMP39]], 16
118; CHECK-OPT-NEXT:    [[TMP43:%.*]] = trunc i32 [[TMP42]] to i16
119; CHECK-OPT-NEXT:    [[TMP44:%.*]] = sext i16 [[TMP43]] to i32
120; CHECK-OPT-NEXT:    [[TMP45:%.*]] = load i32, ptr [[INCDEC_PTR47]], align 2
121; CHECK-OPT-NEXT:    [[TMP46:%.*]] = trunc i32 [[TMP45]] to i16
122; CHECK-OPT-NEXT:    [[TMP47:%.*]] = call i64 @llvm.arm.smlaldx(i32 [[TMP39]], i32 [[TMP45]], i64 [[TMP34]])
123; CHECK-OPT-NEXT:    [[TMP48:%.*]] = sext i16 [[TMP46]] to i32
124; CHECK-OPT-NEXT:    [[TMP49:%.*]] = lshr i32 [[TMP45]], 16
125; CHECK-OPT-NEXT:    [[TMP50:%.*]] = trunc i32 [[TMP49]] to i16
126; CHECK-OPT-NEXT:    [[TMP51:%.*]] = sext i16 [[TMP50]] to i32
127; CHECK-OPT-NEXT:    [[MUL74:%.*]] = mul nsw i32 [[TMP48]], [[TMP41]]
128; CHECK-OPT-NEXT:    [[CONV75:%.*]] = sext i32 [[MUL74]] to i64
129; CHECK-OPT-NEXT:    [[ADD76:%.*]] = add nsw i64 [[SUB62]], [[CONV75]]
130; CHECK-OPT-NEXT:    [[MUL84:%.*]] = mul nsw i32 [[TMP51]], [[TMP44]]
131; CHECK-OPT-NEXT:    [[CONV85:%.*]] = sext i32 [[MUL84]] to i64
132; CHECK-OPT-NEXT:    [[SUB86:%.*]] = sub nsw i64 [[ADD76]], [[CONV85]]
133; CHECK-OPT-NEXT:    [[TMP52:%.*]] = lshr i64 [[SUB86]], 6
134; CHECK-OPT-NEXT:    [[CONV92:%.*]] = trunc i64 [[TMP52]] to i32
135; CHECK-OPT-NEXT:    store i32 [[CONV92]], ptr [[REALRESULT]], align 4
136; CHECK-OPT-NEXT:    [[TMP53:%.*]] = lshr i64 [[TMP47]], 6
137; CHECK-OPT-NEXT:    [[CONV94:%.*]] = trunc i64 [[TMP53]] to i32
138; CHECK-OPT-NEXT:    store i32 [[CONV94]], ptr [[IMAGRESULT]], align 4
139; CHECK-OPT-NEXT:    ret void
140;
141entry:
142  %incdec.ptr = getelementptr inbounds i16, ptr %pSrcA, i32 1
143  %0 = load i16, ptr %pSrcA, align 2
144  %incdec.ptr1 = getelementptr inbounds i16, ptr %pSrcA, i32 2
145  %1 = load i16, ptr %incdec.ptr, align 2
146  %incdec.ptr2 = getelementptr inbounds i16, ptr %pSrcB, i32 1
147  %2 = load i16, ptr %pSrcB, align 2
148  %incdec.ptr3 = getelementptr inbounds i16, ptr %pSrcB, i32 2
149  %3 = load i16, ptr %incdec.ptr2, align 2
150  %conv = sext i16 %0 to i32
151  %conv4 = sext i16 %2 to i32
152  %mul = mul nsw i32 %conv4, %conv
153  %conv5 = sext i32 %mul to i64
154  %conv7 = sext i16 %3 to i32
155  %mul8 = mul nsw i32 %conv7, %conv
156  %conv9 = sext i32 %mul8 to i64
157  %conv11 = sext i16 %1 to i32
158  %mul13 = mul nsw i32 %conv7, %conv11
159  %conv14 = sext i32 %mul13 to i64
160  %sub = sub nsw i64 %conv5, %conv14
161  %mul17 = mul nsw i32 %conv4, %conv11
162  %conv18 = sext i32 %mul17 to i64
163  %add19 = add nsw i64 %conv9, %conv18
164  %incdec.ptr20 = getelementptr inbounds i16, ptr %pSrcA, i32 3
165  %4 = load i16, ptr %incdec.ptr1, align 2
166  %incdec.ptr21 = getelementptr inbounds i16, ptr %pSrcA, i32 4
167  %5 = load i16, ptr %incdec.ptr20, align 2
168  %incdec.ptr22 = getelementptr inbounds i16, ptr %pSrcB, i32 3
169  %6 = load i16, ptr %incdec.ptr3, align 2
170  %incdec.ptr23 = getelementptr inbounds i16, ptr %pSrcB, i32 4
171  %7 = load i16, ptr %incdec.ptr22, align 2
172  %conv24 = sext i16 %4 to i32
173  %conv25 = sext i16 %6 to i32
174  %mul26 = mul nsw i32 %conv25, %conv24
175  %conv27 = sext i32 %mul26 to i64
176  %add28 = add nsw i64 %sub, %conv27
177  %conv30 = sext i16 %7 to i32
178  %mul31 = mul nsw i32 %conv30, %conv24
179  %conv32 = sext i32 %mul31 to i64
180  %conv34 = sext i16 %5 to i32
181  %mul36 = mul nsw i32 %conv30, %conv34
182  %conv37 = sext i32 %mul36 to i64
183  %sub38 = sub nsw i64 %add28, %conv37
184  %mul41 = mul nsw i32 %conv25, %conv34
185  %conv42 = sext i32 %mul41 to i64
186  %add33 = add nsw i64 %add19, %conv42
187  %add43 = add nsw i64 %add33, %conv32
188  %incdec.ptr44 = getelementptr inbounds i16, ptr %pSrcA, i32 5
189  %8 = load i16, ptr %incdec.ptr21, align 2
190  %incdec.ptr45 = getelementptr inbounds i16, ptr %pSrcA, i32 6
191  %9 = load i16, ptr %incdec.ptr44, align 2
192  %incdec.ptr46 = getelementptr inbounds i16, ptr %pSrcB, i32 5
193  %10 = load i16, ptr %incdec.ptr23, align 2
194  %incdec.ptr47 = getelementptr inbounds i16, ptr %pSrcB, i32 6
195  %11 = load i16, ptr %incdec.ptr46, align 2
196  %conv48 = sext i16 %8 to i32
197  %conv49 = sext i16 %10 to i32
198  %mul50 = mul nsw i32 %conv49, %conv48
199  %conv51 = sext i32 %mul50 to i64
200  %add52 = add nsw i64 %sub38, %conv51
201  %conv54 = sext i16 %11 to i32
202  %mul55 = mul nsw i32 %conv54, %conv48
203  %conv56 = sext i32 %mul55 to i64
204  %conv58 = sext i16 %9 to i32
205  %mul60 = mul nsw i32 %conv54, %conv58
206  %conv61 = sext i32 %mul60 to i64
207  %sub62 = sub nsw i64 %add52, %conv61
208  %mul65 = mul nsw i32 %conv49, %conv58
209  %conv66 = sext i32 %mul65 to i64
210  %add57 = add nsw i64 %add43, %conv66
211  %add67 = add nsw i64 %add57, %conv56
212  %incdec.ptr68 = getelementptr inbounds i16, ptr %pSrcA, i32 7
213  %12 = load i16, ptr %incdec.ptr45, align 2
214  %13 = load i16, ptr %incdec.ptr68, align 2
215  %incdec.ptr70 = getelementptr inbounds i16, ptr %pSrcB, i32 7
216  %14 = load i16, ptr %incdec.ptr47, align 2
217  %15 = load i16, ptr %incdec.ptr70, align 2
218  %conv72 = sext i16 %12 to i32
219  %conv73 = sext i16 %14 to i32
220  %mul74 = mul nsw i32 %conv73, %conv72
221  %conv75 = sext i32 %mul74 to i64
222  %add76 = add nsw i64 %sub62, %conv75
223  %conv78 = sext i16 %15 to i32
224  %mul79 = mul nsw i32 %conv78, %conv72
225  %conv80 = sext i32 %mul79 to i64
226  %conv82 = sext i16 %13 to i32
227  %mul84 = mul nsw i32 %conv78, %conv82
228  %conv85 = sext i32 %mul84 to i64
229  %sub86 = sub nsw i64 %add76, %conv85
230  %mul89 = mul nsw i32 %conv73, %conv82
231  %conv90 = sext i32 %mul89 to i64
232  %add81 = add nsw i64 %add67, %conv90
233  %add91 = add nsw i64 %add81, %conv80
234  %16 = lshr i64 %sub86, 6
235  %conv92 = trunc i64 %16 to i32
236  store i32 %conv92, ptr %realResult, align 4
237  %17 = lshr i64 %add91, 6
238  %conv94 = trunc i64 %17 to i32
239  store i32 %conv94, ptr %imagResult, align 4
240  ret void
241}
242