xref: /llvm-project/llvm/test/CodeGen/ARM/ParallelDSP/pr43073.ll (revision 701890164d567866900f3087ffd2ad4da963111c)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt -mtriple=thumbv7-unknown-linux-gnueabihf -arm-parallel-dsp -dce %s -S -o - | FileCheck %s
3
4define i32 @first_mul_invalid(ptr nocapture readonly %in, ptr nocapture readonly %b) {
5; CHECK-LABEL: @first_mul_invalid(
6; CHECK-NEXT:  entry:
7; CHECK-NEXT:    [[TMP0:%.*]] = load i16, ptr [[IN:%.*]], align 2
8; CHECK-NEXT:    [[CONV:%.*]] = sext i16 [[TMP0]] to i32
9; CHECK-NEXT:    [[TMP1:%.*]] = load i16, ptr [[B:%.*]], align 2
10; CHECK-NEXT:    [[CONV2:%.*]] = sext i16 [[TMP1]] to i32
11; CHECK-NEXT:    [[CALL:%.*]] = tail call i32 @bar(i32 [[CONV]], i32 [[CONV2]])
12; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i16, ptr [[IN]], i32 -1
13; CHECK-NEXT:    [[TMP2:%.*]] = load i16, ptr [[ARRAYIDX3]], align 2
14; CHECK-NEXT:    [[CONV4:%.*]] = sext i16 [[TMP2]] to i32
15; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 1
16; CHECK-NEXT:    [[TMP3:%.*]] = load i16, ptr [[ARRAYIDX5]], align 2
17; CHECK-NEXT:    [[CONV6:%.*]] = sext i16 [[TMP3]] to i32
18; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i32 [[CONV6]], [[CONV4]]
19; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[MUL]], [[CALL]]
20; CHECK-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds i16, ptr [[IN]], i32 -3
21; CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX13]], align 2
22; CHECK-NEXT:    [[ARRAYIDX9:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 2
23; CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX9]], align 2
24; CHECK-NEXT:    [[TMP8:%.*]] = call i32 @llvm.arm.smladx(i32 [[TMP5]], i32 [[TMP7]], i32 [[ADD]])
25; CHECK-NEXT:    [[ARRAYIDX25:%.*]] = getelementptr inbounds i16, ptr [[IN]], i32 -5
26; CHECK-NEXT:    [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX25]], align 2
27; CHECK-NEXT:    [[ARRAYIDX21:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 4
28; CHECK-NEXT:    [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX21]], align 2
29; CHECK-NEXT:    [[TMP13:%.*]] = call i32 @llvm.arm.smladx(i32 [[TMP10]], i32 [[TMP12]], i32 [[TMP8]])
30; CHECK-NEXT:    ret i32 [[TMP13]]
31;
32entry:
33  %0 = load i16, ptr %in, align 2
34  %conv = sext i16 %0 to i32
35  %1 = load i16, ptr %b, align 2
36  %conv2 = sext i16 %1 to i32
37  %call = tail call i32 @bar(i32 %conv, i32 %conv2)
38  %arrayidx3 = getelementptr inbounds i16, ptr %in, i32 -1
39  %2 = load i16, ptr %arrayidx3, align 2
40  %conv4 = sext i16 %2 to i32
41  %arrayidx5 = getelementptr inbounds i16, ptr %b, i32 1
42  %3 = load i16, ptr %arrayidx5, align 2
43  %conv6 = sext i16 %3 to i32
44  %mul = mul nsw i32 %conv6, %conv4
45  %add = add i32 %mul, %call
46  %arrayidx7 = getelementptr inbounds i16, ptr %in, i32 -2
47  %4 = load i16, ptr %arrayidx7, align 2
48  %conv8 = sext i16 %4 to i32
49  %arrayidx9 = getelementptr inbounds i16, ptr %b, i32 2
50  %5 = load i16, ptr %arrayidx9, align 2
51  %conv10 = sext i16 %5 to i32
52  %mul11 = mul nsw i32 %conv10, %conv8
53  %add12 = add i32 %add, %mul11
54  %arrayidx13 = getelementptr inbounds i16, ptr %in, i32 -3
55  %6 = load i16, ptr %arrayidx13, align 2
56  %conv14 = sext i16 %6 to i32
57  %arrayidx15 = getelementptr inbounds i16, ptr %b, i32 3
58  %7 = load i16, ptr %arrayidx15, align 2
59  %conv16 = sext i16 %7 to i32
60  %mul17 = mul nsw i32 %conv16, %conv14
61  %add18 = add i32 %add12, %mul17
62  %arrayidx19 = getelementptr inbounds i16, ptr %in, i32 -4
63  %8 = load i16, ptr %arrayidx19, align 2
64  %conv20 = sext i16 %8 to i32
65  %arrayidx21 = getelementptr inbounds i16, ptr %b, i32 4
66  %9 = load i16, ptr %arrayidx21, align 2
67  %conv22 = sext i16 %9 to i32
68  %mul23 = mul nsw i32 %conv22, %conv20
69  %add24 = add i32 %add18, %mul23
70  %arrayidx25 = getelementptr inbounds i16, ptr %in, i32 -5
71  %10 = load i16, ptr %arrayidx25, align 2
72  %conv26 = sext i16 %10 to i32
73  %arrayidx27 = getelementptr inbounds i16, ptr %b, i32 5
74  %11 = load i16, ptr %arrayidx27, align 2
75  %conv28 = sext i16 %11 to i32
76  %mul29 = mul nsw i32 %conv28, %conv26
77  %add30 = add i32 %add24, %mul29
78  ret i32 %add30
79}
80
81define i32 @with_no_acc_input(ptr nocapture readonly %in, ptr nocapture readonly %b) {
82; CHECK-LABEL: @with_no_acc_input(
83; CHECK-NEXT:  entry:
84; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i16, ptr [[IN:%.*]], i32 -1
85; CHECK-NEXT:    [[LD_2:%.*]] = load i16, ptr [[ARRAYIDX3]], align 2
86; CHECK-NEXT:    [[CONV4:%.*]] = sext i16 [[LD_2]] to i32
87; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, ptr [[B:%.*]], i32 1
88; CHECK-NEXT:    [[LD_3:%.*]] = load i16, ptr [[ARRAYIDX5]], align 2
89; CHECK-NEXT:    [[CONV6:%.*]] = sext i16 [[LD_3]] to i32
90; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i32 [[CONV6]], [[CONV4]]
91; CHECK-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds i16, ptr [[IN]], i32 -3
92; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX13]], align 2
93; CHECK-NEXT:    [[ARRAYIDX9:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 2
94; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX9]], align 2
95; CHECK-NEXT:    [[TMP4:%.*]] = call i32 @llvm.arm.smladx(i32 [[TMP1]], i32 [[TMP3]], i32 [[MUL]])
96; CHECK-NEXT:    [[ARRAYIDX25:%.*]] = getelementptr inbounds i16, ptr [[IN]], i32 -5
97; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX25]], align 2
98; CHECK-NEXT:    [[ARRAYIDX21:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 4
99; CHECK-NEXT:    [[TMP8:%.*]] = load i32, ptr [[ARRAYIDX21]], align 2
100; CHECK-NEXT:    [[TMP9:%.*]] = call i32 @llvm.arm.smladx(i32 [[TMP6]], i32 [[TMP8]], i32 [[TMP4]])
101; CHECK-NEXT:    ret i32 [[TMP9]]
102;
103entry:
104  %arrayidx3 = getelementptr inbounds i16, ptr %in, i32 -1
105  %ld.2 = load i16, ptr %arrayidx3, align 2
106  %conv4 = sext i16 %ld.2 to i32
107  %arrayidx5 = getelementptr inbounds i16, ptr %b, i32 1
108  %ld.3 = load i16, ptr %arrayidx5, align 2
109  %conv6 = sext i16 %ld.3 to i32
110  %mul = mul nsw i32 %conv6, %conv4
111  %arrayidx7 = getelementptr inbounds i16, ptr %in, i32 -2
112  %ld.4 = load i16, ptr %arrayidx7, align 2
113  %conv8 = sext i16 %ld.4 to i32
114  %arrayidx9 = getelementptr inbounds i16, ptr %b, i32 2
115  %ld.5 = load i16, ptr %arrayidx9, align 2
116  %conv10 = sext i16 %ld.5 to i32
117  %mul11 = mul nsw i32 %conv10, %conv8
118  %add12 = add i32 %mul, %mul11
119  %arrayidx13 = getelementptr inbounds i16, ptr %in, i32 -3
120  %ld.6 = load i16, ptr %arrayidx13, align 2
121  %conv14 = sext i16 %ld.6 to i32
122  %arrayidx15 = getelementptr inbounds i16, ptr %b, i32 3
123  %ld.7 = load i16, ptr %arrayidx15, align 2
124  %conv16 = sext i16 %ld.7 to i32
125  %mul17 = mul nsw i32 %conv16, %conv14
126  %add18 = add i32 %add12, %mul17
127  %arrayidx19 = getelementptr inbounds i16, ptr %in, i32 -4
128  %ld.8 = load i16, ptr %arrayidx19, align 2
129  %conv20 = sext i16 %ld.8 to i32
130  %arrayidx21 = getelementptr inbounds i16, ptr %b, i32 4
131  %ld.9 = load i16, ptr %arrayidx21, align 2
132  %conv22 = sext i16 %ld.9 to i32
133  %mul23 = mul nsw i32 %conv22, %conv20
134  %add24 = add i32 %add18, %mul23
135  %arrayidx25 = getelementptr inbounds i16, ptr %in, i32 -5
136  %ld.10 = load i16, ptr %arrayidx25, align 2
137  %conv26 = sext i16 %ld.10 to i32
138  %arrayidx27 = getelementptr inbounds i16, ptr %b, i32 5
139  %ld.11 = load i16, ptr %arrayidx27, align 2
140  %conv28 = sext i16 %ld.11 to i32
141  %mul29 = mul nsw i32 %conv28, %conv26
142  %add30 = add i32 %add24, %mul29
143  ret i32 %add30
144}
145
146define i64 @with_64bit_acc(ptr nocapture readonly %in, ptr nocapture readonly %b) {
147; CHECK-LABEL: @with_64bit_acc(
148; CHECK-NEXT:  entry:
149; CHECK-NEXT:    [[TMP0:%.*]] = load i16, ptr [[IN:%.*]], align 2
150; CHECK-NEXT:    [[CONV:%.*]] = sext i16 [[TMP0]] to i32
151; CHECK-NEXT:    [[TMP1:%.*]] = load i16, ptr [[B:%.*]], align 2
152; CHECK-NEXT:    [[CONV2:%.*]] = sext i16 [[TMP1]] to i32
153; CHECK-NEXT:    [[CALL:%.*]] = tail call i32 @bar(i32 [[CONV]], i32 [[CONV2]])
154; CHECK-NEXT:    [[SEXT_0:%.*]] = sext i32 [[CALL]] to i64
155; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i16, ptr [[IN]], i32 -1
156; CHECK-NEXT:    [[TMP2:%.*]] = load i16, ptr [[ARRAYIDX3]], align 2
157; CHECK-NEXT:    [[CONV4:%.*]] = sext i16 [[TMP2]] to i32
158; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 1
159; CHECK-NEXT:    [[TMP3:%.*]] = load i16, ptr [[ARRAYIDX5]], align 2
160; CHECK-NEXT:    [[CONV6:%.*]] = sext i16 [[TMP3]] to i32
161; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i32 [[CONV6]], [[CONV4]]
162; CHECK-NEXT:    [[SEXT_1:%.*]] = sext i32 [[MUL]] to i64
163; CHECK-NEXT:    [[ADD:%.*]] = add i64 [[SEXT_0]], [[SEXT_1]]
164; CHECK-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds i16, ptr [[IN]], i32 -3
165; CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX13]], align 2
166; CHECK-NEXT:    [[ARRAYIDX9:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 2
167; CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX9]], align 2
168; CHECK-NEXT:    [[TMP8:%.*]] = call i64 @llvm.arm.smlaldx(i32 [[TMP5]], i32 [[TMP7]], i64 [[ADD]])
169; CHECK-NEXT:    [[ARRAYIDX25:%.*]] = getelementptr inbounds i16, ptr [[IN]], i32 -5
170; CHECK-NEXT:    [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX25]], align 2
171; CHECK-NEXT:    [[ARRAYIDX21:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 4
172; CHECK-NEXT:    [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX21]], align 2
173; CHECK-NEXT:    [[TMP13:%.*]] = call i64 @llvm.arm.smlaldx(i32 [[TMP10]], i32 [[TMP12]], i64 [[TMP8]])
174; CHECK-NEXT:    ret i64 [[TMP13]]
175;
176entry:
177  %0 = load i16, ptr %in, align 2
178  %conv = sext i16 %0 to i32
179  %1 = load i16, ptr %b, align 2
180  %conv2 = sext i16 %1 to i32
181  %call = tail call i32 @bar(i32 %conv, i32 %conv2)
182  %sext.0 = sext i32 %call to i64
183  %arrayidx3 = getelementptr inbounds i16, ptr %in, i32 -1
184  %2 = load i16, ptr %arrayidx3, align 2
185  %conv4 = sext i16 %2 to i32
186  %arrayidx5 = getelementptr inbounds i16, ptr %b, i32 1
187  %3 = load i16, ptr %arrayidx5, align 2
188  %conv6 = sext i16 %3 to i32
189  %mul = mul nsw i32 %conv6, %conv4
190  %sext.1 = sext i32 %mul to i64
191  %add = add i64 %sext.0, %sext.1
192  %arrayidx7 = getelementptr inbounds i16, ptr %in, i32 -2
193  %4 = load i16, ptr %arrayidx7, align 2
194  %conv8 = sext i16 %4 to i32
195  %arrayidx9 = getelementptr inbounds i16, ptr %b, i32 2
196  %5 = load i16, ptr %arrayidx9, align 2
197  %conv10 = sext i16 %5 to i32
198  %mul11 = mul nsw i32 %conv10, %conv8
199  %sext.2 = sext i32 %mul11 to i64
200  %add12 = add i64 %add, %sext.2
201  %arrayidx13 = getelementptr inbounds i16, ptr %in, i32 -3
202  %6 = load i16, ptr %arrayidx13, align 2
203  %conv14 = sext i16 %6 to i32
204  %arrayidx15 = getelementptr inbounds i16, ptr %b, i32 3
205  %7 = load i16, ptr %arrayidx15, align 2
206  %conv16 = sext i16 %7 to i32
207  %mul17 = mul nsw i32 %conv16, %conv14
208  %sext.3 = sext i32 %mul17 to i64
209  %add18 = add i64 %add12, %sext.3
210  %arrayidx19 = getelementptr inbounds i16, ptr %in, i32 -4
211  %8 = load i16, ptr %arrayidx19, align 2
212  %conv20 = sext i16 %8 to i32
213  %arrayidx21 = getelementptr inbounds i16, ptr %b, i32 4
214  %9 = load i16, ptr %arrayidx21, align 2
215  %conv22 = sext i16 %9 to i32
216  %mul23 = mul nsw i32 %conv22, %conv20
217  %sext.4 = sext i32 %mul23 to i64
218  %add24 = add i64 %add18, %sext.4
219  %arrayidx25 = getelementptr inbounds i16, ptr %in, i32 -5
220  %10 = load i16, ptr %arrayidx25, align 2
221  %conv26 = sext i16 %10 to i32
222  %arrayidx27 = getelementptr inbounds i16, ptr %b, i32 5
223  %11 = load i16, ptr %arrayidx27, align 2
224  %conv28 = sext i16 %11 to i32
225  %mul29 = mul nsw i32 %conv28, %conv26
226  %sext.5 = sext i32 %mul29 to i64
227  %add30 = add i64 %add24, %sext.5
228  ret i64 %add30
229}
230
231define i64 @with_64bit_add_acc(ptr nocapture readonly %px.10756.unr, ptr nocapture readonly %py.8757.unr, i32 %acc) {
232; CHECK-LABEL: @with_64bit_add_acc(
233; CHECK-NEXT:  entry:
234; CHECK-NEXT:    [[SUM_3758_UNR:%.*]] = sext i32 [[ACC:%.*]] to i64
235; CHECK-NEXT:    br label [[BB_1:%.*]]
236; CHECK:       bb.1:
237; CHECK-NEXT:    [[INCDEC_PTR184_EPIL:%.*]] = getelementptr inbounds i16, ptr [[PX_10756_UNR:%.*]], i32 1
238; CHECK-NEXT:    [[TMP216:%.*]] = load i16, ptr [[PX_10756_UNR]], align 2
239; CHECK-NEXT:    [[CONV185_EPIL:%.*]] = sext i16 [[TMP216]] to i32
240; CHECK-NEXT:    [[INCDEC_PTR186_EPIL:%.*]] = getelementptr inbounds i16, ptr [[PY_8757_UNR:%.*]], i32 -1
241; CHECK-NEXT:    [[TMP217:%.*]] = load i16, ptr [[PY_8757_UNR]], align 2
242; CHECK-NEXT:    [[CONV187_EPIL:%.*]] = sext i16 [[TMP217]] to i32
243; CHECK-NEXT:    [[MUL_EPIL:%.*]] = mul nsw i32 [[CONV187_EPIL]], [[CONV185_EPIL]]
244; CHECK-NEXT:    [[CONV188_EPIL:%.*]] = sext i32 [[MUL_EPIL]] to i64
245; CHECK-NEXT:    [[ADD189_EPIL:%.*]] = add nsw i64 [[SUM_3758_UNR]], [[CONV188_EPIL]]
246; CHECK-NEXT:    [[INCDEC_PTR190_EPIL:%.*]] = getelementptr inbounds i16, ptr [[PX_10756_UNR]], i32 2
247; CHECK-NEXT:    [[TMP218:%.*]] = load i16, ptr [[INCDEC_PTR184_EPIL]], align 2
248; CHECK-NEXT:    [[CONV191_EPIL:%.*]] = sext i16 [[TMP218]] to i32
249; CHECK-NEXT:    [[TMP219:%.*]] = load i16, ptr [[INCDEC_PTR186_EPIL]], align 2
250; CHECK-NEXT:    [[CONV193_EPIL:%.*]] = sext i16 [[TMP219]] to i32
251; CHECK-NEXT:    [[MUL194_EPIL:%.*]] = mul nsw i32 [[CONV193_EPIL]], [[CONV191_EPIL]]
252; CHECK-NEXT:    [[TMP0:%.*]] = sext i32 [[MUL194_EPIL]] to i64
253; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[TMP0]], [[ADD189_EPIL]]
254; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[INCDEC_PTR190_EPIL]], align 2
255; CHECK-NEXT:    [[INCDEC_PTR199_EPIL:%.*]] = getelementptr inbounds i16, ptr [[PY_8757_UNR]], i32 -3
256; CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[INCDEC_PTR199_EPIL]], align 2
257; CHECK-NEXT:    [[TMP6:%.*]] = call i64 @llvm.arm.smlaldx(i32 [[TMP5]], i32 [[TMP3]], i64 [[TMP1]])
258; CHECK-NEXT:    ret i64 [[TMP6]]
259;
260entry:
261  %sum.3758.unr = sext i32 %acc to i64
262  br label %bb.1
263
264bb.1:
265  %incdec.ptr184.epil = getelementptr inbounds i16, ptr %px.10756.unr, i32 1
266  %tmp216 = load i16, ptr %px.10756.unr, align 2
267  %conv185.epil = sext i16 %tmp216 to i32
268  %incdec.ptr186.epil = getelementptr inbounds i16, ptr %py.8757.unr, i32 -1
269  %tmp217 = load i16, ptr %py.8757.unr, align 2
270  %conv187.epil = sext i16 %tmp217 to i32
271  %mul.epil = mul nsw i32 %conv187.epil, %conv185.epil
272  %conv188.epil = sext i32 %mul.epil to i64
273  %add189.epil = add nsw i64 %sum.3758.unr, %conv188.epil
274  %incdec.ptr190.epil = getelementptr inbounds i16, ptr %px.10756.unr, i32 2
275  %tmp218 = load i16, ptr %incdec.ptr184.epil, align 2
276  %conv191.epil = sext i16 %tmp218 to i32
277  %incdec.ptr192.epil = getelementptr inbounds i16, ptr %py.8757.unr, i32 -2
278  %tmp219 = load i16, ptr %incdec.ptr186.epil, align 2
279  %conv193.epil = sext i16 %tmp219 to i32
280  %mul194.epil = mul nsw i32 %conv193.epil, %conv191.epil
281  %conv195.epil = sext i32 %mul194.epil to i64
282  %add196.epil = add nsw i64 %add189.epil, %conv195.epil
283  %incdec.ptr197.epil = getelementptr inbounds i16, ptr %px.10756.unr, i32 3
284  %tmp220 = load i16, ptr %incdec.ptr190.epil, align 2
285  %conv198.epil = sext i16 %tmp220 to i32
286  %incdec.ptr199.epil = getelementptr inbounds i16, ptr %py.8757.unr, i32 -3
287  %tmp221 = load i16, ptr %incdec.ptr192.epil, align 2
288  %conv200.epil = sext i16 %tmp221 to i32
289  %mul201.epil = mul nsw i32 %conv200.epil, %conv198.epil
290  %conv202.epil = sext i32 %mul201.epil to i64
291  %add203.epil = add nsw i64 %add196.epil, %conv202.epil
292  %tmp222 = load i16, ptr %incdec.ptr197.epil, align 2
293  %conv205.epil = sext i16 %tmp222 to i32
294  %tmp223 = load i16, ptr %incdec.ptr199.epil, align 2
295  %conv207.epil = sext i16 %tmp223 to i32
296  %mul208.epil = mul nsw i32 %conv207.epil, %conv205.epil
297  %conv209.epil = sext i32 %mul208.epil to i64
298  %add210.epil = add nsw i64 %add203.epil, %conv209.epil
299  ret i64 %add210.epil
300}
301
302declare dso_local i32 @bar(i32, i32) local_unnamed_addr
303
304