xref: /llvm-project/llvm/test/CodeGen/ARM/ParallelDSP/overlapping.ll (revision 0dba791a25e7e520760e0c5127434a0377bc50db)
1; RUN: opt -arm-parallel-dsp -mtriple=armv7-a -S %s -o - | FileCheck %s
2
3; CHECK-LABEL: overlap_1
4; CHECK: [[GEP_A:%[^ ]+]] = getelementptr i16, i16* %a, i32 1
5; CHECK: [[GEP_B:%[^ ]+]] = getelementptr i16, i16* %b, i32 1
6; CHECK: [[CAST_A:%[^ ]+]] = bitcast i16* [[GEP_A]] to i32*
7; CHECK: [[LD_A:%[^ ]+]] = load i32, i32* [[CAST_A]]
8; CHECK: [[CAST_B:%[^ ]+]] = bitcast i16* [[GEP_B]] to i32*
9; CHECK: [[LD_B:%[^ ]+]] = load i32, i32* [[CAST_B]]
10; CHECK: call i32 @llvm.arm.smlad(i32 [[LD_A]], i32 [[LD_B]], i32 %acc
11define i32 @overlap_1(i16* %a, i16* %b, i32 %acc) {
12entry:
13  %addr.a.1 = getelementptr i16, i16* %a, i32 1
14  %addr.b.1 = getelementptr i16, i16* %b, i32 1
15  %ld.a.0 = load i16, i16* %a
16  %sext.a.0 = sext i16 %ld.a.0 to i32
17  %ld.b.0 = load i16, i16* %b
18  %ld.a.1 = load i16, i16* %addr.a.1
19  %ld.b.1 = load i16, i16* %addr.b.1
20  %sext.a.1 = sext i16 %ld.a.1 to i32
21  %sext.b.1 = sext i16 %ld.b.1 to i32
22  %sext.b.0 = sext i16 %ld.b.0 to i32
23  %mul.0 = mul i32 %sext.a.0, %sext.b.0
24  %mul.1 = mul i32 %sext.a.1, %sext.b.1
25  %addr.a.2 = getelementptr i16, i16* %a, i32 2
26  %addr.b.2 = getelementptr i16, i16* %b, i32 2
27  %ld.a.2 = load i16, i16* %addr.a.2
28  %ld.b.2 = load i16, i16* %addr.b.2
29  %sext.a.2 = sext i16 %ld.a.2 to i32
30  %sext.b.2 = sext i16 %ld.b.2 to i32
31  %mul.2 = mul i32 %sext.a.2, %sext.b.2
32  %add = add i32 %mul.0, %mul.1
33  %add.1 = add i32 %mul.1, %mul.2
34  %add.2 = add i32 %add.1, %add
35  %res = add i32 %add.2, %acc
36  ret i32 %res
37}
38
39; CHECK-LABEL: overlap_2
40; CHECK: [[CAST_A:%[^ ]+]] = bitcast i16* %a to i32*
41; CHECK: [[LD_A:%[^ ]+]] = load i32, i32* [[CAST_A]]
42; CHECK: [[CAST_B:%[^ ]+]] = bitcast i16* %b to i32*
43; CHECK: [[LD_B:%[^ ]+]] = load i32, i32* [[CAST_B]]
44; CHECK: call i32 @llvm.arm.smlad(i32 [[LD_A]], i32 [[LD_B]], i32 %acc
45define i32 @overlap_2(i16* %a, i16* %b, i32 %acc) {
46entry:
47  %addr.a.1 = getelementptr i16, i16* %a, i32 1
48  %addr.b.1 = getelementptr i16, i16* %b, i32 1
49  %ld.a.0 = load i16, i16* %a
50  %sext.a.0 = sext i16 %ld.a.0 to i32
51  %ld.b.0 = load i16, i16* %b
52  %ld.a.1 = load i16, i16* %addr.a.1
53  %ld.b.1 = load i16, i16* %addr.b.1
54  %sext.a.1 = sext i16 %ld.a.1 to i32
55  %sext.b.1 = sext i16 %ld.b.1 to i32
56  %sext.b.0 = sext i16 %ld.b.0 to i32
57  %mul.0 = mul i32 %sext.a.0, %sext.b.0
58  %mul.1 = mul i32 %sext.a.1, %sext.b.1
59  %addr.a.2 = getelementptr i16, i16* %a, i32 2
60  %addr.b.2 = getelementptr i16, i16* %b, i32 2
61  %ld.a.2 = load i16, i16* %addr.a.2
62  %ld.b.2 = load i16, i16* %addr.b.2
63  %sext.a.2 = sext i16 %ld.a.2 to i32
64  %sext.b.2 = sext i16 %ld.b.2 to i32
65  %mul.2 = mul i32 %sext.b.2, %sext.a.2
66  %add = add i32 %mul.0, %mul.1
67  %add.1 = add i32 %mul.1, %mul.2
68  %add.2 = add i32 %add, %add.1
69  %res = add i32 %add.2, %acc
70  ret i32 %res
71}
72
73; CHECK-LABEL: overlap_3
74; CHECK: [[GEP_B:%[^ ]+]] = getelementptr i16, i16* %b, i32 1
75; CHECK: [[CAST_A:%[^ ]+]] = bitcast i16* %a to i32*
76; CHECK: [[LD_A:%[^ ]+]] = load i32, i32* [[CAST_A]]
77; CHECK: [[CAST_B:%[^ ]+]] = bitcast i16* %b to i32*
78; CHECK: [[LD_B:%[^ ]+]] = load i32, i32* [[CAST_B]]
79; CHECK: [[CAST_B_1:%[^ ]+]] = bitcast i16* [[GEP_B]] to i32*
80; CHECK: [[LD_B_1:%[^ ]+]] = load i32, i32* [[CAST_B_1]]
81; CHECK: [[GEP_A:%[^ ]+]] = getelementptr i16, i16* %a, i32 2
82; CHECK: [[CAST_A_2:%[^ ]+]] = bitcast i16* [[GEP_A]] to i32*
83; CHECK: [[LD_A_2:%[^ ]+]] = load i32, i32* [[CAST_A_2]]
84; CHECK: [[SMLAD:%[^ ]+]] = call i32 @llvm.arm.smlad(i32 [[LD_A_2]], i32 [[LD_B_1]], i32 %acc)
85; CHECK: call i32 @llvm.arm.smlad(i32 [[LD_A]], i32 [[LD_B]], i32 [[SMLAD]])
86define i32 @overlap_3(i16* %a, i16* %b, i32 %acc) {
87entry:
88  %addr.a.1 = getelementptr i16, i16* %a, i32 1
89  %addr.b.1 = getelementptr i16, i16* %b, i32 1
90  %ld.a.0 = load i16, i16* %a
91  %sext.a.0 = sext i16 %ld.a.0 to i32
92  %ld.b.0 = load i16, i16* %b
93  %ld.a.1 = load i16, i16* %addr.a.1
94  %ld.b.1 = load i16, i16* %addr.b.1
95  %sext.a.1 = sext i16 %ld.a.1 to i32
96  %sext.b.1 = sext i16 %ld.b.1 to i32
97  %sext.b.0 = sext i16 %ld.b.0 to i32
98  %mul.0 = mul i32 %sext.a.0, %sext.b.0
99  %mul.1 = mul i32 %sext.a.1, %sext.b.1
100  %addr.a.2 = getelementptr i16, i16* %a, i32 2
101  %addr.b.2 = getelementptr i16, i16* %b, i32 2
102  %addr.a.3 = getelementptr i16, i16* %a, i32 3
103  %ld.a.2 = load i16, i16* %addr.a.2
104  %ld.b.2 = load i16, i16* %addr.b.2
105  %ld.a.3 = load i16, i16* %addr.a.3
106  %sext.a.2 = sext i16 %ld.a.2 to i32
107  %sext.b.2 = sext i16 %ld.b.2 to i32
108  %sext.a.3 = sext i16 %ld.a.3 to i32
109  %mul.2 = mul i32 %sext.a.2, %sext.b.1
110  %mul.3 = mul i32 %sext.a.3, %sext.b.2
111  %add = add i32 %mul.0, %mul.1
112  %add.1 = add i32 %mul.2, %mul.3
113  %add.2 = add i32 %add.1, %add
114  %res = add i32 %add.2, %acc
115  ret i32 %res
116}
117
118; CHECK-LABEL: overlap_4
119; CHECK: [[GEP_B:%[^ ]+]] = getelementptr i16, i16* %b, i32 1
120; CHECK: [[CAST_A:%[^ ]+]] = bitcast i16* %a to i32*
121; CHECK: [[LD_A:%[^ ]+]] = load i32, i32* [[CAST_A]]
122; CHECK: [[CAST_B:%[^ ]+]] = bitcast i16* %b to i32*
123; CHECK: [[LD_B:%[^ ]+]] = load i32, i32* [[CAST_B]]
124; CHECK: [[CAST_B_1:%[^ ]+]] = bitcast i16* [[GEP_B]] to i32*
125; CHECK: [[LD_B_1:%[^ ]+]] = load i32, i32* [[CAST_B_1]]
126; CHECK: [[GEP_A:%[^ ]+]] = getelementptr i16, i16* %a, i32 2
127; CHECK: [[CAST_A_2:%[^ ]+]] = bitcast i16* [[GEP_A]] to i32*
128; CHECK: [[LD_A_2:%[^ ]+]] = load i32, i32* [[CAST_A_2]]
129; CHECK: [[SMLAD:%[^ ]+]] = call i32 @llvm.arm.smladx(i32 [[LD_A_2]], i32 [[LD_B_1]], i32 %acc)
130; CHECK: call i32 @llvm.arm.smlad(i32 [[LD_A]], i32 [[LD_B]], i32 [[SMLAD]])
131define i32 @overlap_4(i16* %a, i16* %b, i32 %acc) {
132entry:
133  %addr.a.1 = getelementptr i16, i16* %a, i32 1
134  %addr.b.1 = getelementptr i16, i16* %b, i32 1
135  %ld.a.0 = load i16, i16* %a
136  %sext.a.0 = sext i16 %ld.a.0 to i32
137  %ld.b.0 = load i16, i16* %b
138  %ld.a.1 = load i16, i16* %addr.a.1
139  %ld.b.1 = load i16, i16* %addr.b.1
140  %sext.a.1 = sext i16 %ld.a.1 to i32
141  %sext.b.1 = sext i16 %ld.b.1 to i32
142  %sext.b.0 = sext i16 %ld.b.0 to i32
143  %mul.0 = mul i32 %sext.a.0, %sext.b.0
144  %mul.1 = mul i32 %sext.a.1, %sext.b.1
145  %addr.a.2 = getelementptr i16, i16* %a, i32 2
146  %addr.b.2 = getelementptr i16, i16* %b, i32 2
147  %addr.a.3 = getelementptr i16, i16* %a, i32 3
148  %ld.a.2 = load i16, i16* %addr.a.2
149  %ld.b.2 = load i16, i16* %addr.b.2
150  %ld.a.3 = load i16, i16* %addr.a.3
151  %sext.a.2 = sext i16 %ld.a.2 to i32
152  %sext.b.2 = sext i16 %ld.b.2 to i32
153  %sext.a.3 = sext i16 %ld.a.3 to i32
154  %mul.2 = mul i32 %sext.b.2, %sext.a.2
155  %mul.3 = mul i32 %sext.b.1, %sext.a.3
156  %add = add i32 %mul.0, %mul.1
157  %add.1 = add i32 %mul.2, %mul.3
158  %add.2 = add i32 %add.1, %add
159  %res = add i32 %add.2, %acc
160  ret i32 %res
161}
162