xref: /llvm-project/llvm/test/CodeGen/ARM/ParallelDSP/sext-acc.ll (revision 701890164d567866900f3087ffd2ad4da963111c)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt -arm-parallel-dsp -dce -mtriple=armv7-a -S %s -o - | FileCheck %s
3
4define i64 @sext_acc_1(ptr %a, ptr %b, i32 %acc) {
5; CHECK-LABEL: @sext_acc_1(
6; CHECK-NEXT:  entry:
7; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[A:%.*]], align 2
8; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[B:%.*]], align 2
9; CHECK-NEXT:    [[TMP4:%.*]] = sext i32 [[ACC:%.*]] to i64
10; CHECK-NEXT:    [[TMP5:%.*]] = call i64 @llvm.arm.smlald(i32 [[TMP1]], i32 [[TMP3]], i64 [[TMP4]])
11; CHECK-NEXT:    ret i64 [[TMP5]]
12;
13entry:
14  %ld.a.0 = load i16, ptr %a
15  %sext.a.0 = sext i16 %ld.a.0 to i32
16  %ld.b.0 = load i16, ptr %b
17  %sext.b.0 = sext i16 %ld.b.0 to i32
18  %mul.0 = mul i32 %sext.a.0, %sext.b.0
19  %addr.a.1 = getelementptr i16, ptr %a, i32 1
20  %addr.b.1 = getelementptr i16, ptr %b, i32 1
21  %ld.a.1 = load i16, ptr %addr.a.1
22  %sext.a.1 = sext i16 %ld.a.1 to i32
23  %ld.b.1 = load i16, ptr %addr.b.1
24  %sext.b.1 = sext i16 %ld.b.1 to i32
25  %mul.1 = mul i32 %sext.a.1, %sext.b.1
26  %sext.mul.0 = sext i32 %mul.0 to i64
27  %sext.mul.1 = sext i32 %mul.1 to i64
28  %add = add i64 %sext.mul.0, %sext.mul.1
29  %sext.acc = sext i32 %acc to i64
30  %res = add i64 %add, %sext.acc
31  ret i64 %res
32}
33
34define i64 @sext_acc_2(ptr %a, ptr %b, i32 %acc) {
35; CHECK-LABEL: @sext_acc_2(
36; CHECK-NEXT:  entry:
37; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[A:%.*]], align 2
38; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[B:%.*]], align 2
39; CHECK-NEXT:    [[ADDR_A_2:%.*]] = getelementptr i16, ptr [[A]], i32 2
40; CHECK-NEXT:    [[ADDR_B_2:%.*]] = getelementptr i16, ptr [[B]], i32 2
41; CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[ADDR_A_2]], align 2
42; CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr [[ADDR_B_2]], align 2
43; CHECK-NEXT:    [[TMP8:%.*]] = sext i32 [[ACC:%.*]] to i64
44; CHECK-NEXT:    [[TMP9:%.*]] = call i64 @llvm.arm.smlald(i32 [[TMP1]], i32 [[TMP3]], i64 [[TMP8]])
45; CHECK-NEXT:    [[TMP10:%.*]] = call i64 @llvm.arm.smlald(i32 [[TMP5]], i32 [[TMP7]], i64 [[TMP9]])
46; CHECK-NEXT:    ret i64 [[TMP10]]
47;
48entry:
49  %ld.a.0 = load i16, ptr %a
50  %sext.a.0 = sext i16 %ld.a.0 to i32
51  %ld.b.0 = load i16, ptr %b
52  %sext.b.0 = sext i16 %ld.b.0 to i32
53  %mul.0 = mul i32 %sext.a.0, %sext.b.0
54  %addr.a.1 = getelementptr i16, ptr %a, i32 1
55  %addr.b.1 = getelementptr i16, ptr %b, i32 1
56  %ld.a.1 = load i16, ptr %addr.a.1
57  %sext.a.1 = sext i16 %ld.a.1 to i32
58  %ld.b.1 = load i16, ptr %addr.b.1
59  %sext.b.1 = sext i16 %ld.b.1 to i32
60  %mul.1 = mul i32 %sext.a.1, %sext.b.1
61  %sext.mul.0 = sext i32 %mul.0 to i64
62  %sext.mul.1 = sext i32 %mul.1 to i64
63  %add = add i64 %sext.mul.0, %sext.mul.1
64  %sext.acc = sext i32 %acc to i64
65  %add.1 = add i64 %add, %sext.acc
66  %addr.a.2 = getelementptr i16, ptr %a, i32 2
67  %addr.b.2 = getelementptr i16, ptr %b, i32 2
68  %ld.a.2 = load i16, ptr %addr.a.2
69  %sext.a.2 = sext i16 %ld.a.2 to i32
70  %ld.b.2 = load i16, ptr %addr.b.2
71  %sext.b.2 = sext i16 %ld.b.2 to i32
72  %mul.2 = mul i32 %sext.a.2, %sext.b.2
73  %sext.mul.2 = sext i32 %mul.2 to i64
74  %addr.a.3 = getelementptr i16, ptr %a, i32 3
75  %addr.b.3 = getelementptr i16, ptr %b, i32 3
76  %ld.a.3 = load i16, ptr %addr.a.3
77  %sext.a.3 = sext i16 %ld.a.3 to i32
78  %ld.b.3 = load i16, ptr %addr.b.3
79  %sext.b.3 = sext i16 %ld.b.3 to i32
80  %mul.3 = mul i32 %sext.a.3, %sext.b.3
81  %sext.mul.3 = sext i32 %mul.3 to i64
82  %add.2 = add i64 %sext.mul.2, %sext.mul.3
83  %add.3 = add i64 %add.1, %add.2
84  ret i64 %add.3
85}
86
87define i64 @sext_acc_3(ptr %a, ptr %b, i32 %acc) {
88; CHECK-LABEL: @sext_acc_3(
89; CHECK-NEXT:  entry:
90; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[A:%.*]], align 2
91; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[B:%.*]], align 2
92; CHECK-NEXT:    [[ADDR_A_2:%.*]] = getelementptr i16, ptr [[A]], i32 2
93; CHECK-NEXT:    [[ADDR_B_2:%.*]] = getelementptr i16, ptr [[B]], i32 2
94; CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[ADDR_A_2]], align 2
95; CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr [[ADDR_B_2]], align 2
96; CHECK-NEXT:    [[TMP8:%.*]] = sext i32 [[ACC:%.*]] to i64
97; CHECK-NEXT:    [[TMP9:%.*]] = call i64 @llvm.arm.smlald(i32 [[TMP1]], i32 [[TMP3]], i64 [[TMP8]])
98; CHECK-NEXT:    [[TMP10:%.*]] = call i64 @llvm.arm.smlald(i32 [[TMP5]], i32 [[TMP7]], i64 [[TMP9]])
99; CHECK-NEXT:    ret i64 [[TMP10]]
100;
101entry:
102  %ld.a.0 = load i16, ptr %a
103  %sext.a.0 = sext i16 %ld.a.0 to i32
104  %ld.b.0 = load i16, ptr %b
105  %sext.b.0 = sext i16 %ld.b.0 to i32
106  %mul.0 = mul i32 %sext.a.0, %sext.b.0
107  %addr.a.1 = getelementptr i16, ptr %a, i32 1
108  %addr.b.1 = getelementptr i16, ptr %b, i32 1
109  %ld.a.1 = load i16, ptr %addr.a.1
110  %sext.a.1 = sext i16 %ld.a.1 to i32
111  %ld.b.1 = load i16, ptr %addr.b.1
112  %sext.b.1 = sext i16 %ld.b.1 to i32
113  %mul.1 = mul i32 %sext.a.1, %sext.b.1
114  %sext.mul.0 = sext i32 %mul.0 to i64
115  %sext.mul.1 = sext i32 %mul.1 to i64
116  %add = add i64 %sext.mul.0, %sext.mul.1
117  %addr.a.2 = getelementptr i16, ptr %a, i32 2
118  %addr.b.2 = getelementptr i16, ptr %b, i32 2
119  %ld.a.2 = load i16, ptr %addr.a.2
120  %sext.a.2 = sext i16 %ld.a.2 to i32
121  %ld.b.2 = load i16, ptr %addr.b.2
122  %sext.b.2 = sext i16 %ld.b.2 to i32
123  %mul.2 = mul i32 %sext.a.2, %sext.b.2
124  %sext.mul.2 = sext i32 %mul.2 to i64
125  %addr.a.3 = getelementptr i16, ptr %a, i32 3
126  %addr.b.3 = getelementptr i16, ptr %b, i32 3
127  %ld.a.3 = load i16, ptr %addr.a.3
128  %sext.a.3 = sext i16 %ld.a.3 to i32
129  %ld.b.3 = load i16, ptr %addr.b.3
130  %sext.b.3 = sext i16 %ld.b.3 to i32
131  %mul.3 = mul i32 %sext.a.3, %sext.b.3
132  %sext.mul.3 = sext i32 %mul.3 to i64
133  %add.1 = add i64 %sext.mul.2, %sext.mul.3
134  %add.2 = add i64 %add, %add.1
135  %sext.acc = sext i32 %acc to i64
136  %add.3 = add i64 %add.2, %sext.acc
137  ret i64 %add.3
138}
139
140define i64 @sext_acc_4(ptr %a, ptr %b, i32 %acc) {
141; CHECK-LABEL: @sext_acc_4(
142; CHECK-NEXT:  entry:
143; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[A:%.*]], align 2
144; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[B:%.*]], align 2
145; CHECK-NEXT:    [[ADDR_A_2:%.*]] = getelementptr i16, ptr [[A]], i32 2
146; CHECK-NEXT:    [[ADDR_B_2:%.*]] = getelementptr i16, ptr [[B]], i32 2
147; CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[ADDR_A_2]], align 2
148; CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr [[ADDR_B_2]], align 2
149; CHECK-NEXT:    [[TMP8:%.*]] = sext i32 [[ACC:%.*]] to i64
150; CHECK-NEXT:    [[TMP9:%.*]] = call i64 @llvm.arm.smlald(i32 [[TMP1]], i32 [[TMP3]], i64 [[TMP8]])
151; CHECK-NEXT:    [[TMP10:%.*]] = call i64 @llvm.arm.smlald(i32 [[TMP5]], i32 [[TMP7]], i64 [[TMP9]])
152; CHECK-NEXT:    ret i64 [[TMP10]]
153;
154entry:
155  %ld.a.0 = load i16, ptr %a
156  %sext.a.0 = sext i16 %ld.a.0 to i32
157  %ld.b.0 = load i16, ptr %b
158  %sext.b.0 = sext i16 %ld.b.0 to i32
159  %mul.0 = mul i32 %sext.a.0, %sext.b.0
160  %addr.a.1 = getelementptr i16, ptr %a, i32 1
161  %addr.b.1 = getelementptr i16, ptr %b, i32 1
162  %ld.a.1 = load i16, ptr %addr.a.1
163  %sext.a.1 = sext i16 %ld.a.1 to i32
164  %ld.b.1 = load i16, ptr %addr.b.1
165  %sext.b.1 = sext i16 %ld.b.1 to i32
166  %mul.1 = mul i32 %sext.a.1, %sext.b.1
167  %add = add i32 %mul.0, %mul.1
168  %sext.add = sext i32 %add to i64
169  %addr.a.2 = getelementptr i16, ptr %a, i32 2
170  %addr.b.2 = getelementptr i16, ptr %b, i32 2
171  %ld.a.2 = load i16, ptr %addr.a.2
172  %sext.a.2 = sext i16 %ld.a.2 to i32
173  %ld.b.2 = load i16, ptr %addr.b.2
174  %sext.b.2 = sext i16 %ld.b.2 to i32
175  %mul.2 = mul i32 %sext.a.2, %sext.b.2
176  %sext.mul.2 = sext i32 %mul.2 to i64
177  %addr.a.3 = getelementptr i16, ptr %a, i32 3
178  %addr.b.3 = getelementptr i16, ptr %b, i32 3
179  %ld.a.3 = load i16, ptr %addr.a.3
180  %sext.a.3 = sext i16 %ld.a.3 to i32
181  %ld.b.3 = load i16, ptr %addr.b.3
182  %sext.b.3 = sext i16 %ld.b.3 to i32
183  %mul.3 = mul i32 %sext.a.3, %sext.b.3
184  %sext.mul.3 = sext i32 %mul.3 to i64
185  %sext.acc = sext i32 %acc to i64
186  %add.1 = add i64 %sext.mul.2, %sext.add
187  %add.2 = add i64 %sext.add, %add.1
188  %add.3 = add i64 %add.2, %sext.mul.3
189  %add.4 = add i64 %add.3, %sext.acc
190  ret i64 %add.4
191}
192