xref: /llvm-project/llvm/test/CodeGen/ARM/ParallelDSP/exchange.ll (revision 0dba791a25e7e520760e0c5127434a0377bc50db)
1; RUN: opt -arm-parallel-dsp -mtriple=armv7-a -S %s -o - | FileCheck %s
2
3; CHECK-LABEL: exchange_1
4; CHECK: [[CAST_A:%[^ ]+]] = bitcast i16* %a to i32*
5; CHECK: [[LD_A:%[^ ]+]] = load i32, i32* [[CAST_A]]
6; CHECK: [[CAST_B:%[^ ]+]] = bitcast i16* %b to i32*
7; CHECK: [[LD_B:%[^ ]+]] = load i32, i32* [[CAST_B]]
8; CHECK: call i32 @llvm.arm.smladx(i32 [[LD_A]], i32 [[LD_B]]
9define i32 @exchange_1(i16* %a, i16* %b, i32 %acc) {
10entry:
11  %addr.a.1 = getelementptr i16, i16* %a, i32 1
12  %addr.b.1 = getelementptr i16, i16* %b, i32 1
13  %ld.a.0 = load i16, i16* %a
14  %sext.a.0 = sext i16 %ld.a.0 to i32
15  %ld.b.0 = load i16, i16* %b
16  %ld.a.1 = load i16, i16* %addr.a.1
17  %ld.b.1 = load i16, i16* %addr.b.1
18  %sext.a.1 = sext i16 %ld.a.1 to i32
19  %sext.b.1 = sext i16 %ld.b.1 to i32
20  %sext.b.0 = sext i16 %ld.b.0 to i32
21  %mul.0 = mul i32 %sext.a.0, %sext.b.1
22  %mul.1 = mul i32 %sext.a.1, %sext.b.0
23  %add = add i32 %mul.0, %mul.1
24  %res = add i32 %add, %acc
25  ret i32 %res
26}
27
28; CHECK-LABEL: exchange_2
29; CHECK: [[CAST_A:%[^ ]+]] = bitcast i16* %a to i32*
30; CHECK: [[LD_A:%[^ ]+]] = load i32, i32* [[CAST_A]]
31; CHECK: [[CAST_B:%[^ ]+]] = bitcast i16* %b to i32*
32; CHECK: [[LD_B:%[^ ]+]] = load i32, i32* [[CAST_B]]
33; CHECK: call i32 @llvm.arm.smladx(i32 [[LD_A]], i32 [[LD_B]]
34define i32 @exchange_2(i16* %a, i16* %b, i32 %acc) {
35entry:
36  %addr.a.1 = getelementptr i16, i16* %a, i32 1
37  %addr.b.1 = getelementptr i16, i16* %b, i32 1
38  %ld.a.0 = load i16, i16* %a
39  %sext.a.0 = sext i16 %ld.a.0 to i32
40  %ld.b.0 = load i16, i16* %b
41  %ld.a.1 = load i16, i16* %addr.a.1
42  %ld.b.1 = load i16, i16* %addr.b.1
43  %sext.a.1 = sext i16 %ld.a.1 to i32
44  %sext.b.1 = sext i16 %ld.b.1 to i32
45  %sext.b.0 = sext i16 %ld.b.0 to i32
46  %mul.0 = mul i32 %sext.b.1, %sext.a.0
47  %mul.1 = mul i32 %sext.b.0, %sext.a.1
48  %add = add i32 %mul.0, %mul.1
49  %res = add i32 %add, %acc
50  ret i32 %res
51}
52
53; CHECK-LABEL: exchange_3
54; CHECK: [[CAST_A:%[^ ]+]] = bitcast i16* %a to i32*
55; CHECK: [[LD_A:%[^ ]+]] = load i32, i32* [[CAST_A]]
56; CHECK: [[CAST_B:%[^ ]+]] = bitcast i16* %b to i32*
57; CHECK: [[LD_B:%[^ ]+]] = load i32, i32* [[CAST_B]]
58; CHECK: call i32 @llvm.arm.smladx(i32 [[LD_B]], i32 [[LD_A]]
59define i32 @exchange_3(i16* %a, i16* %b, i32 %acc) {
60entry:
61  %addr.a.1 = getelementptr i16, i16* %a, i32 1
62  %addr.b.1 = getelementptr i16, i16* %b, i32 1
63  %ld.a.0 = load i16, i16* %a
64  %sext.a.0 = sext i16 %ld.a.0 to i32
65  %ld.b.0 = load i16, i16* %b
66  %ld.a.1 = load i16, i16* %addr.a.1
67  %ld.b.1 = load i16, i16* %addr.b.1
68  %sext.a.1 = sext i16 %ld.a.1 to i32
69  %sext.b.1 = sext i16 %ld.b.1 to i32
70  %sext.b.0 = sext i16 %ld.b.0 to i32
71  %mul.0 = mul i32 %sext.a.0, %sext.b.1
72  %mul.1 = mul i32 %sext.a.1, %sext.b.0
73  %add = add i32 %mul.1, %mul.0
74  %res = add i32 %add, %acc
75  ret i32 %res
76}
77
78; CHECK-LABEL: exchange_4
79; CHECK: [[CAST_A:%[^ ]+]] = bitcast i16* %a to i32*
80; CHECK: [[LD_A:%[^ ]+]] = load i32, i32* [[CAST_A]]
81; CHECK: [[CAST_B:%[^ ]+]] = bitcast i16* %b to i32*
82; CHECK: [[LD_B:%[^ ]+]] = load i32, i32* [[CAST_B]]
83; CHECK: call i32 @llvm.arm.smladx(i32 [[LD_B]], i32 [[LD_A]]
84define i32 @exchange_4(i16* %a, i16* %b, i32 %acc) {
85entry:
86  %addr.a.1 = getelementptr i16, i16* %a, i32 1
87  %addr.b.1 = getelementptr i16, i16* %b, i32 1
88  %ld.a.0 = load i16, i16* %a
89  %sext.a.0 = sext i16 %ld.a.0 to i32
90  %ld.b.0 = load i16, i16* %b
91  %ld.a.1 = load i16, i16* %addr.a.1
92  %ld.b.1 = load i16, i16* %addr.b.1
93  %sext.a.1 = sext i16 %ld.a.1 to i32
94  %sext.b.1 = sext i16 %ld.b.1 to i32
95  %sext.b.0 = sext i16 %ld.b.0 to i32
96  %mul.0 = mul i32 %sext.b.1, %sext.a.0
97  %mul.1 = mul i32 %sext.b.0, %sext.a.1
98  %add = add i32 %mul.1, %mul.0
99  %res = add i32 %add, %acc
100  ret i32 %res
101}
102
103; CHECK-LABEL: exchange_multi_use_1
104; CHECK: [[CAST_A:%[^ ]+]] = bitcast i16* %a to i32*
105; CHECK: [[LD_A:%[^ ]+]] = load i32, i32* [[CAST_A]]
106; CHECK: [[CAST_B:%[^ ]+]] = bitcast i16* %b to i32*
107; CHECK: [[LD_B:%[^ ]+]] = load i32, i32* [[CAST_B]]
108; CHECK: [[GEP:%[^ ]+]] = getelementptr i16, i16* %a, i32 2
109; CHECK: [[CAST_A_2:%[^ ]+]] = bitcast i16* [[GEP]] to i32*
110; CHECK: [[LD_A_2:%[^ ]+]] = load i32, i32* [[CAST_A_2]]
111; CHECK: [[X:%[^ ]+]] = call i32 @llvm.arm.smladx(i32 [[LD_A]], i32 [[LD_B]], i32 %acc
112; CHECK: call i32 @llvm.arm.smlad(i32 [[LD_A_2]], i32 [[LD_B]], i32 [[X]])
113define i32 @exchange_multi_use_1(i16* %a, i16* %b, i32 %acc) {
114entry:
115  %addr.a.1 = getelementptr i16, i16* %a, i32 1
116  %addr.b.1 = getelementptr i16, i16* %b, i32 1
117  %ld.a.0 = load i16, i16* %a
118  %sext.a.0 = sext i16 %ld.a.0 to i32
119  %ld.b.0 = load i16, i16* %b
120  %ld.a.1 = load i16, i16* %addr.a.1
121  %ld.b.1 = load i16, i16* %addr.b.1
122  %sext.a.1 = sext i16 %ld.a.1 to i32
123  %sext.b.1 = sext i16 %ld.b.1 to i32
124  %sext.b.0 = sext i16 %ld.b.0 to i32
125  %mul.0 = mul i32 %sext.a.0, %sext.b.1
126  %mul.1 = mul i32 %sext.a.1, %sext.b.0
127  %add = add i32 %mul.0, %mul.1
128  %addr.a.2 = getelementptr i16, i16* %a, i32 2
129  %addr.a.3 = getelementptr i16, i16* %a, i32 3
130  %ld.a.2 = load i16, i16* %addr.a.2
131  %ld.a.3 = load i16, i16* %addr.a.3
132  %sext.a.2 = sext i16 %ld.a.2 to i32
133  %sext.a.3 = sext i16 %ld.a.3 to i32
134  %mul.2 = mul i32 %sext.a.3, %sext.b.1
135  %mul.3 = mul i32 %sext.a.2, %sext.b.0
136  %add.1 = add i32 %mul.2, %mul.3
137  %add.2 = add i32 %add, %add.1
138  %res = add i32 %add.2, %acc
139  ret i32 %res
140}
141
142; CHECK-LABEL: exchange_multi_use_2
143; CHECK: [[CAST_A:%[^ ]+]] = bitcast i16* %a to i32*
144; CHECK: [[LD_A:%[^ ]+]] = load i32, i32* [[CAST_A]]
145; CHECK: [[CAST_B:%[^ ]+]] = bitcast i16* %b to i32*
146; CHECK: [[LD_B:%[^ ]+]] = load i32, i32* [[CAST_B]]
147; CHECK: [[GEP:%[^ ]+]] = getelementptr i16, i16* %a, i32 2
148; CHECK: [[CAST_A_2:%[^ ]+]] = bitcast i16* [[GEP]] to i32*
149; CHECK: [[LD_A_2:%[^ ]+]] = load i32, i32* [[CAST_A_2]]
150; CHECK: [[X:%[^ ]+]] = call i32 @llvm.arm.smlad(i32 [[LD_A]], i32 [[LD_B]], i32 %acc
151; CHECK: call i32 @llvm.arm.smladx(i32 [[LD_B]], i32 [[LD_A_2]], i32 [[X]])
152define i32 @exchange_multi_use_2(i16* %a, i16* %b, i32 %acc) {
153entry:
154  %addr.a.1 = getelementptr i16, i16* %a, i32 1
155  %addr.b.1 = getelementptr i16, i16* %b, i32 1
156  %ld.a.0 = load i16, i16* %a
157  %sext.a.0 = sext i16 %ld.a.0 to i32
158  %ld.b.0 = load i16, i16* %b
159  %ld.a.1 = load i16, i16* %addr.a.1
160  %ld.b.1 = load i16, i16* %addr.b.1
161  %sext.a.1 = sext i16 %ld.a.1 to i32
162  %sext.b.1 = sext i16 %ld.b.1 to i32
163  %sext.b.0 = sext i16 %ld.b.0 to i32
164  %mul.0 = mul i32 %sext.a.0, %sext.b.0
165  %mul.1 = mul i32 %sext.a.1, %sext.b.1
166  %add = add i32 %mul.0, %mul.1
167  %addr.a.2 = getelementptr i16, i16* %a, i32 2
168  %addr.a.3 = getelementptr i16, i16* %a, i32 3
169  %ld.a.2 = load i16, i16* %addr.a.2
170  %ld.a.3 = load i16, i16* %addr.a.3
171  %sext.a.2 = sext i16 %ld.a.2 to i32
172  %sext.a.3 = sext i16 %ld.a.3 to i32
173  %mul.2 = mul i32 %sext.b.0, %sext.a.3
174  %mul.3 = mul i32 %sext.b.1, %sext.a.2
175  %add.1 = add i32 %mul.2, %mul.3
176  %add.2 = add i32 %add, %add.1
177  %res = add i32 %add.2, %acc
178  ret i32 %res
179}
180
181; TODO: Why aren't two intrinsics generated?
182; CHECK-LABEL: exchange_multi_use_3
183; CHECK: [[CAST_B:%[^ ]+]] = bitcast i16* %b to i32*
184; CHECK: [[LD_B:%[^ ]+]] = load i32, i32* [[CAST_B]]
185; CHECK: [[GEP:%[^ ]+]] = getelementptr i16, i16* %a, i32 2
186; CHECK: [[CAST_A_2:%[^ ]+]] = bitcast i16* [[GEP]] to i32*
187; CHECK: [[LD_A_2:%[^ ]+]] = load i32, i32* [[CAST_A_2]]
188; CHECK-NOT: call i32 @llvm.arm.smlad
189; CHECK: [[X:%[^ ]+]] = call i32 @llvm.arm.smladx(i32 [[LD_B]], i32 [[LD_A_2]], i32 0
190define i32 @exchange_multi_use_3(i16* %a, i16* %b, i32 %acc) {
191entry:
192  %addr.a.1 = getelementptr i16, i16* %a, i32 1
193  %addr.b.1 = getelementptr i16, i16* %b, i32 1
194  %ld.a.0 = load i16, i16* %a
195  %sext.a.0 = sext i16 %ld.a.0 to i32
196  %ld.b.0 = load i16, i16* %b
197  %ld.a.1 = load i16, i16* %addr.a.1
198  %ld.b.1 = load i16, i16* %addr.b.1
199  %sext.a.1 = sext i16 %ld.a.1 to i32
200  %sext.b.1 = sext i16 %ld.b.1 to i32
201  %sext.b.0 = sext i16 %ld.b.0 to i32
202  %addr.a.2 = getelementptr i16, i16* %a, i32 2
203  %addr.a.3 = getelementptr i16, i16* %a, i32 3
204  %ld.a.2 = load i16, i16* %addr.a.2
205  %ld.a.3 = load i16, i16* %addr.a.3
206  %sext.a.2 = sext i16 %ld.a.2 to i32
207  %sext.a.3 = sext i16 %ld.a.3 to i32
208  %mul.2 = mul i32 %sext.b.0, %sext.a.3
209  %mul.3 = mul i32 %sext.b.1, %sext.a.2
210  %mul.0 = mul i32 %sext.a.0, %sext.b.0
211  %mul.1 = mul i32 %sext.a.1, %sext.b.1
212  %add = add i32 %mul.0, %mul.1
213  %add.1 = add i32 %mul.2, %mul.3
214  %sub = sub i32 %add, %add.1
215  %res = add i32 %acc, %sub
216  ret i32 %res
217}
218
219; TODO: Why isn't smladx generated too?
220; CHECK-LABEL: exchange_multi_use_4
221; CHECK: [[CAST_A:%[^ ]+]] = bitcast i16* %a to i32*
222; CHECK: [[LD_A:%[^ ]+]] = load i32, i32* [[CAST_A]]
223; CHECK: [[CAST_B:%[^ ]+]] = bitcast i16* %b to i32*
224; CHECK: [[LD_B:%[^ ]+]] = load i32, i32* [[CAST_B]]
225; CHECK: [[X:%[^ ]+]] = call i32 @llvm.arm.smlad(i32 [[LD_A]], i32 [[LD_B]], i32 0
226; CHECK-NOT: call i32 @llvm.arm.smlad
227define i32 @exchange_multi_use_4(i16* %a, i16* %b, i32 %acc) {
228entry:
229  %addr.a.1 = getelementptr i16, i16* %a, i32 1
230  %addr.b.1 = getelementptr i16, i16* %b, i32 1
231  %ld.a.0 = load i16, i16* %a
232  %sext.a.0 = sext i16 %ld.a.0 to i32
233  %ld.b.0 = load i16, i16* %b
234  %ld.a.1 = load i16, i16* %addr.a.1
235  %ld.b.1 = load i16, i16* %addr.b.1
236  %sext.a.1 = sext i16 %ld.a.1 to i32
237  %sext.b.1 = sext i16 %ld.b.1 to i32
238  %sext.b.0 = sext i16 %ld.b.0 to i32
239  %addr.a.2 = getelementptr i16, i16* %a, i32 2
240  %addr.a.3 = getelementptr i16, i16* %a, i32 3
241  %ld.a.2 = load i16, i16* %addr.a.2
242  %ld.a.3 = load i16, i16* %addr.a.3
243  %sext.a.2 = sext i16 %ld.a.2 to i32
244  %sext.a.3 = sext i16 %ld.a.3 to i32
245  %mul.2 = mul i32 %sext.b.0, %sext.a.3
246  %mul.3 = mul i32 %sext.b.1, %sext.a.2
247  %mul.0 = mul i32 %sext.a.0, %sext.b.0
248  %mul.1 = mul i32 %sext.a.1, %sext.b.1
249  %add.1 = add i32 %mul.2, %mul.3
250  %add = add i32 %mul.0, %mul.1
251  %sub = sub i32 %add, %add.1
252  %res = add i32 %acc, %sub
253  ret i32 %res
254}
255
256; CHECK-LABEL: exchange_swap
257; CHECK: [[CAST_A:%[^ ]+]] = bitcast i16* %a to i32*
258; CHECK: [[LD_A:%[^ ]+]] = load i32, i32* [[CAST_A]]
259; CHECK: [[CAST_B:%[^ ]+]] = bitcast i16* %b to i32*
260; CHECK: [[LD_B:%[^ ]+]] = load i32, i32* [[CAST_B]]
261; CHECK: call i32 @llvm.arm.smladx(i32 [[LD_B]], i32 [[LD_A]]
262define i32 @exchange_swap(i16* %a, i16* %b, i32 %acc) {
263entry:
264  %addr.a.1 = getelementptr i16, i16* %a, i32 1
265  %addr.b.1 = getelementptr i16, i16* %b, i32 1
266  %ld.a.0 = load i16, i16* %a
267  %sext.a.0 = sext i16 %ld.a.0 to i32
268  %ld.b.0 = load i16, i16* %b
269  %ld.a.1 = load i16, i16* %addr.a.1
270  %ld.b.1 = load i16, i16* %addr.b.1
271  %sext.a.1 = sext i16 %ld.a.1 to i32
272  %sext.b.1 = sext i16 %ld.b.1 to i32
273  %sext.b.0 = sext i16 %ld.b.0 to i32
274  %mul.0 = mul i32 %sext.a.1, %sext.b.0
275  %mul.1 = mul i32 %sext.a.0, %sext.b.1
276  %add = add i32 %mul.0, %mul.1
277  %res = add i32 %add, %acc
278  ret i32 %res
279}
280
281; CHECK-LABEL: exchange_swap_2
282; CHECK: [[CAST_A:%[^ ]+]] = bitcast i16* %a to i32*
283; CHECK: [[LD_A:%[^ ]+]] = load i32, i32* [[CAST_A]]
284; CHECK: [[CAST_B:%[^ ]+]] = bitcast i16* %b to i32*
285; CHECK: [[LD_B:%[^ ]+]] = load i32, i32* [[CAST_B]]
286; CHECK: call i32 @llvm.arm.smladx(i32 [[LD_A]], i32 [[LD_B]]
287define i32 @exchange_swap_2(i16* %a, i16* %b, i32 %acc) {
288entry:
289  %addr.a.1 = getelementptr i16, i16* %a, i32 1
290  %addr.b.1 = getelementptr i16, i16* %b, i32 1
291  %ld.a.0 = load i16, i16* %a
292  %sext.a.0 = sext i16 %ld.a.0 to i32
293  %ld.b.0 = load i16, i16* %b
294  %ld.a.1 = load i16, i16* %addr.a.1
295  %ld.b.1 = load i16, i16* %addr.b.1
296  %sext.a.1 = sext i16 %ld.a.1 to i32
297  %sext.b.1 = sext i16 %ld.b.1 to i32
298  %sext.b.0 = sext i16 %ld.b.0 to i32
299  %mul.0 = mul i32 %sext.a.1, %sext.b.0
300  %mul.1 = mul i32 %sext.a.0, %sext.b.1
301  %add = add i32 %mul.1, %mul.0
302  %res = add i32 %add, %acc
303  ret i32 %res
304}
305
306; CHECK-LABEL: exchange_swap_3
307; CHECK: [[CAST_A:%[^ ]+]] = bitcast i16* %a to i32*
308; CHECK: [[LD_A:%[^ ]+]] = load i32, i32* [[CAST_A]]
309; CHECK: [[CAST_B:%[^ ]+]] = bitcast i16* %b to i32*
310; CHECK: [[LD_B:%[^ ]+]] = load i32, i32* [[CAST_B]]
311; CHECK: call i32 @llvm.arm.smladx(i32 [[LD_A]], i32 [[LD_B]]
312define i32 @exchange_swap_3(i16* %a, i16* %b, i32 %acc) {
313entry:
314  %addr.a.1 = getelementptr i16, i16* %a, i32 1
315  %addr.b.1 = getelementptr i16, i16* %b, i32 1
316  %ld.a.0 = load i16, i16* %a
317  %sext.a.0 = sext i16 %ld.a.0 to i32
318  %ld.b.0 = load i16, i16* %b
319  %ld.a.1 = load i16, i16* %addr.a.1
320  %ld.b.1 = load i16, i16* %addr.b.1
321  %sext.a.1 = sext i16 %ld.a.1 to i32
322  %sext.b.1 = sext i16 %ld.b.1 to i32
323  %sext.b.0 = sext i16 %ld.b.0 to i32
324  %mul.0 = mul i32 %sext.b.0, %sext.a.1
325  %mul.1 = mul i32 %sext.b.1, %sext.a.0
326  %add = add i32 %mul.1, %mul.0
327  %res = add i32 %add, %acc
328  ret i32 %res
329}
330