xref: /llvm-project/llvm/test/CodeGen/ARM/ParallelDSP/blocks.ll (revision 701890164d567866900f3087ffd2ad4da963111c)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt -arm-parallel-dsp -dce -mtriple=armv7-a -S %s -o - | FileCheck %s
3
4define i32 @single_block(ptr %a, ptr %b, i32 %acc) {
5; CHECK-LABEL: @single_block(
6; CHECK-NEXT:  entry:
7; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[A:%.*]], align 2
8; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[B:%.*]], align 2
9; CHECK-NEXT:    [[TMP4:%.*]] = call i32 @llvm.arm.smlad(i32 [[TMP1]], i32 [[TMP3]], i32 [[ACC:%.*]])
10; CHECK-NEXT:    ret i32 [[TMP4]]
11;
12entry:
13  %ld.a.0 = load i16, ptr %a
14  %sext.a.0 = sext i16 %ld.a.0 to i32
15  %ld.b.0 = load i16, ptr %b
16  %sext.b.0 = sext i16 %ld.b.0 to i32
17  %mul.0 = mul i32 %sext.a.0, %sext.b.0
18  %addr.a.1 = getelementptr i16, ptr %a, i32 1
19  %addr.b.1 = getelementptr i16, ptr %b, i32 1
20  %ld.a.1 = load i16, ptr %addr.a.1
21  %sext.a.1 = sext i16 %ld.a.1 to i32
22  %ld.b.1 = load i16, ptr %addr.b.1
23  %sext.b.1 = sext i16 %ld.b.1 to i32
24  %mul.1 = mul i32 %sext.a.1, %sext.b.1
25  %add = add i32 %mul.0, %mul.1
26  %res = add i32 %add, %acc
27  ret i32 %res
28}
29
30define i64 @single_block_64(ptr %a, ptr %b, i64 %acc) {
31; CHECK-LABEL: @single_block_64(
32; CHECK-NEXT:  entry:
33; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[A:%.*]], align 2
34; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[B:%.*]], align 2
35; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @llvm.arm.smlald(i32 [[TMP1]], i32 [[TMP3]], i64 [[ACC:%.*]])
36; CHECK-NEXT:    ret i64 [[TMP4]]
37;
38entry:
39  %ld.a.0 = load i16, ptr %a
40  %sext.a.0 = sext i16 %ld.a.0 to i32
41  %ld.b.0 = load i16, ptr %b
42  %sext.b.0 = sext i16 %ld.b.0 to i32
43  %mul.0 = mul i32 %sext.a.0, %sext.b.0
44  %addr.a.1 = getelementptr i16, ptr %a, i32 1
45  %addr.b.1 = getelementptr i16, ptr %b, i32 1
46  %ld.a.1 = load i16, ptr %addr.a.1
47  %sext.a.1 = sext i16 %ld.a.1 to i32
48  %ld.b.1 = load i16, ptr %addr.b.1
49  %sext.b.1 = sext i16 %ld.b.1 to i32
50  %mul.1 = mul i32 %sext.a.1, %sext.b.1
51  %sext.mul.0 = sext i32 %mul.0 to i64
52  %sext.mul.1 = sext i32 %mul.1 to i64
53  %add = add i64 %sext.mul.0, %sext.mul.1
54  %res = add i64 %add, %acc
55  ret i64 %res
56}
57
58define i32 @multi_block(ptr %a, ptr %b, i32 %acc) {
59; CHECK-LABEL: @multi_block(
60; CHECK-NEXT:  entry:
61; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[A:%.*]], align 2
62; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[B:%.*]], align 2
63; CHECK-NEXT:    [[TMP4:%.*]] = call i32 @llvm.arm.smlad(i32 [[TMP1]], i32 [[TMP3]], i32 0)
64; CHECK-NEXT:    br label [[BB_1:%.*]]
65; CHECK:       bb.1:
66; CHECK-NEXT:    [[RES:%.*]] = add i32 [[TMP4]], [[ACC:%.*]]
67; CHECK-NEXT:    ret i32 [[RES]]
68;
69entry:
70  %ld.a.0 = load i16, ptr %a
71  %sext.a.0 = sext i16 %ld.a.0 to i32
72  %ld.b.0 = load i16, ptr %b
73  %sext.b.0 = sext i16 %ld.b.0 to i32
74  %mul.0 = mul i32 %sext.a.0, %sext.b.0
75  %addr.a.1 = getelementptr i16, ptr %a, i32 1
76  %addr.b.1 = getelementptr i16, ptr %b, i32 1
77  %ld.a.1 = load i16, ptr %addr.a.1
78  %sext.a.1 = sext i16 %ld.a.1 to i32
79  %ld.b.1 = load i16, ptr %addr.b.1
80  %sext.b.1 = sext i16 %ld.b.1 to i32
81  %mul.1 = mul i32 %sext.a.1, %sext.b.1
82  %add = add i32 %mul.0, %mul.1
83  br label %bb.1
84
85bb.1:
86  %res = add i32 %add, %acc
87  ret i32 %res
88}
89
90define i64 @multi_block_64(ptr %a, ptr %b, i64 %acc) {
91; CHECK-LABEL: @multi_block_64(
92; CHECK-NEXT:  entry:
93; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[A:%.*]], align 2
94; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[B:%.*]], align 2
95; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @llvm.arm.smlald(i32 [[TMP1]], i32 [[TMP3]], i64 0)
96; CHECK-NEXT:    br label [[BB_1:%.*]]
97; CHECK:       bb.1:
98; CHECK-NEXT:    [[RES:%.*]] = add i64 [[TMP4]], [[ACC:%.*]]
99; CHECK-NEXT:    ret i64 [[RES]]
100;
101entry:
102  %ld.a.0 = load i16, ptr %a
103  %sext.a.0 = sext i16 %ld.a.0 to i32
104  %ld.b.0 = load i16, ptr %b
105  %sext.b.0 = sext i16 %ld.b.0 to i32
106  %mul.0 = mul i32 %sext.a.0, %sext.b.0
107  %addr.a.1 = getelementptr i16, ptr %a, i32 1
108  %addr.b.1 = getelementptr i16, ptr %b, i32 1
109  %ld.a.1 = load i16, ptr %addr.a.1
110  %sext.a.1 = sext i16 %ld.a.1 to i32
111  %ld.b.1 = load i16, ptr %addr.b.1
112  %sext.b.1 = sext i16 %ld.b.1 to i32
113  %mul.1 = mul i32 %sext.a.1, %sext.b.1
114  %sext.mul.0 = sext i32 %mul.0 to i64
115  %sext.mul.1 = sext i32 %mul.1 to i64
116  %add = add i64 %sext.mul.0, %sext.mul.1
117  br label %bb.1
118
119bb.1:
120  %res = add i64 %add, %acc
121  ret i64 %res
122}
123
124define i32 @multi_block_1(ptr %a, ptr %b, i32 %acc) {
125; CHECK-LABEL: @multi_block_1(
126; CHECK-NEXT:  entry:
127; CHECK-NEXT:    [[LD_A_0:%.*]] = load i16, ptr [[A:%.*]], align 2
128; CHECK-NEXT:    [[SEXT_A_0:%.*]] = sext i16 [[LD_A_0]] to i32
129; CHECK-NEXT:    [[LD_B_0:%.*]] = load i16, ptr [[B:%.*]], align 2
130; CHECK-NEXT:    [[SEXT_B_0:%.*]] = sext i16 [[LD_B_0]] to i32
131; CHECK-NEXT:    [[MUL_0:%.*]] = mul i32 [[SEXT_A_0]], [[SEXT_B_0]]
132; CHECK-NEXT:    br label [[BB_1:%.*]]
133; CHECK:       bb.1:
134; CHECK-NEXT:    [[ADDR_A_1:%.*]] = getelementptr i16, ptr [[A]], i32 1
135; CHECK-NEXT:    [[ADDR_B_1:%.*]] = getelementptr i16, ptr [[B]], i32 1
136; CHECK-NEXT:    [[LD_A_1:%.*]] = load i16, ptr [[ADDR_A_1]], align 2
137; CHECK-NEXT:    [[SEXT_A_1:%.*]] = sext i16 [[LD_A_1]] to i32
138; CHECK-NEXT:    [[LD_B_1:%.*]] = load i16, ptr [[ADDR_B_1]], align 2
139; CHECK-NEXT:    [[SEXT_B_1:%.*]] = sext i16 [[LD_B_1]] to i32
140; CHECK-NEXT:    [[MUL_1:%.*]] = mul i32 [[SEXT_A_1]], [[SEXT_B_1]]
141; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[MUL_0]], [[MUL_1]]
142; CHECK-NEXT:    [[RES:%.*]] = add i32 [[ADD]], [[ACC:%.*]]
143; CHECK-NEXT:    ret i32 [[RES]]
144;
145entry:
146  %ld.a.0 = load i16, ptr %a
147  %sext.a.0 = sext i16 %ld.a.0 to i32
148  %ld.b.0 = load i16, ptr %b
149  %sext.b.0 = sext i16 %ld.b.0 to i32
150  %mul.0 = mul i32 %sext.a.0, %sext.b.0
151  br label %bb.1
152
153bb.1:
154  %addr.a.1 = getelementptr i16, ptr %a, i32 1
155  %addr.b.1 = getelementptr i16, ptr %b, i32 1
156  %ld.a.1 = load i16, ptr %addr.a.1
157  %sext.a.1 = sext i16 %ld.a.1 to i32
158  %ld.b.1 = load i16, ptr %addr.b.1
159  %sext.b.1 = sext i16 %ld.b.1 to i32
160  %mul.1 = mul i32 %sext.a.1, %sext.b.1
161  %add = add i32 %mul.0, %mul.1
162  %res = add i32 %add, %acc
163  ret i32 %res
164}
165
166; TODO: Four smlads should be generated here, but mul.0 and mul.3 remain as
167; scalars.
168define i32 @num_load_limit(ptr %a, ptr %b, i32 %acc) {
169; CHECK-LABEL: @num_load_limit(
170; CHECK-NEXT:  entry:
171; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[A:%.*]], align 2
172; CHECK-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
173; CHECK-NEXT:    [[TMP3:%.*]] = sext i16 [[TMP2]] to i32
174; CHECK-NEXT:    [[TMP4:%.*]] = lshr i32 [[TMP1]], 16
175; CHECK-NEXT:    [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16
176; CHECK-NEXT:    [[TMP6:%.*]] = sext i16 [[TMP5]] to i32
177; CHECK-NEXT:    [[TMP8:%.*]] = load i32, ptr [[B:%.*]], align 2
178; CHECK-NEXT:    [[TMP9:%.*]] = trunc i32 [[TMP8]] to i16
179; CHECK-NEXT:    [[TMP10:%.*]] = sext i16 [[TMP9]] to i32
180; CHECK-NEXT:    [[MUL_0:%.*]] = mul i32 [[TMP3]], [[TMP10]]
181; CHECK-NEXT:    [[ADDR_B_3:%.*]] = getelementptr i16, ptr [[B]], i32 3
182; CHECK-NEXT:    [[LD_B_3:%.*]] = load i16, ptr [[ADDR_B_3]], align 2
183; CHECK-NEXT:    [[SEXT_B_3:%.*]] = sext i16 [[LD_B_3]] to i32
184; CHECK-NEXT:    [[MUL_3:%.*]] = mul i32 [[TMP6]], [[SEXT_B_3]]
185; CHECK-NEXT:    [[TMP11:%.*]] = add i32 [[MUL_3]], [[ACC:%.*]]
186; CHECK-NEXT:    [[TMP12:%.*]] = add i32 [[MUL_0]], [[TMP11]]
187; CHECK-NEXT:    [[TMP13:%.*]] = call i32 @llvm.arm.smlad(i32 [[TMP1]], i32 [[TMP8]], i32 [[TMP12]])
188; CHECK-NEXT:    [[ADDR_A_4:%.*]] = getelementptr i16, ptr [[A]], i32 4
189; CHECK-NEXT:    [[ADDR_B_4:%.*]] = getelementptr i16, ptr [[B]], i32 4
190; CHECK-NEXT:    [[TMP15:%.*]] = load i32, ptr [[ADDR_A_4]], align 2
191; CHECK-NEXT:    [[TMP17:%.*]] = load i32, ptr [[ADDR_B_4]], align 2
192; CHECK-NEXT:    [[TMP18:%.*]] = call i32 @llvm.arm.smlad(i32 [[TMP15]], i32 [[TMP17]], i32 [[TMP13]])
193; CHECK-NEXT:    [[ADDR_A_6:%.*]] = getelementptr i16, ptr [[A]], i32 6
194; CHECK-NEXT:    [[ADDR_B_6:%.*]] = getelementptr i16, ptr [[B]], i32 6
195; CHECK-NEXT:    [[TMP20:%.*]] = load i32, ptr [[ADDR_A_6]], align 2
196; CHECK-NEXT:    [[TMP22:%.*]] = load i32, ptr [[ADDR_B_6]], align 2
197; CHECK-NEXT:    [[TMP23:%.*]] = call i32 @llvm.arm.smlad(i32 [[TMP20]], i32 [[TMP22]], i32 [[TMP18]])
198; CHECK-NEXT:    ret i32 [[TMP23]]
199;
200entry:
201  %ld.a.0 = load i16, ptr %a
202  %sext.a.0 = sext i16 %ld.a.0 to i32
203  %ld.b.0 = load i16, ptr %b
204  %sext.b.0 = sext i16 %ld.b.0 to i32
205  %mul.0 = mul i32 %sext.a.0, %sext.b.0
206  %addr.a.1 = getelementptr i16, ptr %a, i32 1
207  %addr.b.1 = getelementptr i16, ptr %b, i32 1
208  %ld.a.1 = load i16, ptr %addr.a.1
209  %sext.a.1 = sext i16 %ld.a.1 to i32
210  %ld.b.1 = load i16, ptr %addr.b.1
211  %sext.b.1 = sext i16 %ld.b.1 to i32
212  %mul.1 = mul i32 %sext.a.1, %sext.b.1
213  %add.0 = add i32 %mul.0, %mul.1
214
215  %addr.a.2 = getelementptr i16, ptr %a, i32 2
216  %addr.b.2 = getelementptr i16, ptr %b, i32 2
217  %ld.a.2 = load i16, ptr %addr.a.2
218  %sext.a.2 = sext i16 %ld.a.2 to i32
219  %ld.b.2 = load i16, ptr %addr.b.2
220  %sext.b.2 = sext i16 %ld.b.2 to i32
221  %mul.2 = mul i32 %sext.a.0, %sext.b.0
222  %addr.a.3 = getelementptr i16, ptr %a, i32 3
223  %addr.b.3 = getelementptr i16, ptr %b, i32 3
224  %ld.a.3 = load i16, ptr %addr.a.3
225  %sext.a.3 = sext i16 %ld.a.3 to i32
226  %ld.b.3 = load i16, ptr %addr.b.3
227  %sext.b.3 = sext i16 %ld.b.3 to i32
228  %mul.3 = mul i32 %sext.a.1, %sext.b.3
229  %add.3 = add i32 %mul.2, %mul.3
230
231  %addr.a.4 = getelementptr i16, ptr %a, i32 4
232  %addr.b.4 = getelementptr i16, ptr %b, i32 4
233  %ld.a.4 = load i16, ptr %addr.a.4
234  %sext.a.4 = sext i16 %ld.a.4 to i32
235  %ld.b.4 = load i16, ptr %addr.b.4
236  %sext.b.4 = sext i16 %ld.b.4 to i32
237  %mul.4 = mul i32 %sext.a.4, %sext.b.4
238  %addr.a.5 = getelementptr i16, ptr %a, i32 5
239  %addr.b.5 = getelementptr i16, ptr %b, i32 5
240  %ld.a.5 = load i16, ptr %addr.a.5
241  %sext.a.5 = sext i16 %ld.a.5 to i32
242  %ld.b.5 = load i16, ptr %addr.b.5
243  %sext.b.5 = sext i16 %ld.b.5 to i32
244  %mul.5 = mul i32 %sext.a.5, %sext.b.5
245  %add.5 = add i32 %mul.4, %mul.5
246
247  %addr.a.6 = getelementptr i16, ptr %a, i32 6
248  %addr.b.6 = getelementptr i16, ptr %b, i32 6
249  %ld.a.6 = load i16, ptr %addr.a.6
250  %sext.a.6 = sext i16 %ld.a.6 to i32
251  %ld.b.6 = load i16, ptr %addr.b.6
252  %sext.b.6 = sext i16 %ld.b.6 to i32
253  %mul.6 = mul i32 %sext.a.6, %sext.b.6
254  %addr.a.7 = getelementptr i16, ptr %a, i32 7
255  %addr.b.7 = getelementptr i16, ptr %b, i32 7
256  %ld.a.7 = load i16, ptr %addr.a.7
257  %sext.a.7 = sext i16 %ld.a.7 to i32
258  %ld.b.7 = load i16, ptr %addr.b.7
259  %sext.b.7 = sext i16 %ld.b.7 to i32
260  %mul.7 = mul i32 %sext.a.7, %sext.b.7
261  %add.7 = add i32 %mul.6, %mul.7
262
263  %add.10 = add i32 %add.7, %add.5
264  %add.11 = add i32 %add.3, %add.0
265  %add.12 = add i32 %add.10, %add.11
266  %res = add i32 %add.12, %acc
267  ret i32 %res
268}
269
270define i32 @too_many_loads(ptr %a, ptr %b, i32 %acc) {
271; CHECK-LABEL: @too_many_loads(
272; CHECK-NEXT:  entry:
273; CHECK-NEXT:    [[LD_A_0:%.*]] = load i16, ptr [[A:%.*]], align 2
274; CHECK-NEXT:    [[SEXT_A_0:%.*]] = sext i16 [[LD_A_0]] to i32
275; CHECK-NEXT:    [[LD_B_0:%.*]] = load i16, ptr [[B:%.*]], align 2
276; CHECK-NEXT:    [[SEXT_B_0:%.*]] = sext i16 [[LD_B_0]] to i32
277; CHECK-NEXT:    [[MUL_0:%.*]] = mul i32 [[SEXT_A_0]], [[SEXT_B_0]]
278; CHECK-NEXT:    [[ADDR_A_1:%.*]] = getelementptr i16, ptr [[A]], i32 1
279; CHECK-NEXT:    [[ADDR_B_1:%.*]] = getelementptr i16, ptr [[B]], i32 1
280; CHECK-NEXT:    [[LD_A_1:%.*]] = load i16, ptr [[ADDR_A_1]], align 2
281; CHECK-NEXT:    [[SEXT_A_1:%.*]] = sext i16 [[LD_A_1]] to i32
282; CHECK-NEXT:    [[LD_B_1:%.*]] = load i16, ptr [[ADDR_B_1]], align 2
283; CHECK-NEXT:    [[SEXT_B_1:%.*]] = sext i16 [[LD_B_1]] to i32
284; CHECK-NEXT:    [[MUL_1:%.*]] = mul i32 [[SEXT_A_1]], [[SEXT_B_1]]
285; CHECK-NEXT:    [[ADD_0:%.*]] = add i32 [[MUL_0]], [[MUL_1]]
286; CHECK-NEXT:    [[MUL_2:%.*]] = mul i32 [[SEXT_A_0]], [[SEXT_B_0]]
287; CHECK-NEXT:    [[ADDR_B_3:%.*]] = getelementptr i16, ptr [[B]], i32 3
288; CHECK-NEXT:    [[LD_B_3:%.*]] = load i16, ptr [[ADDR_B_3]], align 2
289; CHECK-NEXT:    [[SEXT_B_3:%.*]] = sext i16 [[LD_B_3]] to i32
290; CHECK-NEXT:    [[MUL_3:%.*]] = mul i32 [[SEXT_A_1]], [[SEXT_B_3]]
291; CHECK-NEXT:    [[ADD_3:%.*]] = add i32 [[MUL_2]], [[MUL_3]]
292; CHECK-NEXT:    [[ADDR_A_4:%.*]] = getelementptr i16, ptr [[A]], i32 4
293; CHECK-NEXT:    [[ADDR_B_4:%.*]] = getelementptr i16, ptr [[B]], i32 4
294; CHECK-NEXT:    [[LD_A_4:%.*]] = load i16, ptr [[ADDR_A_4]], align 2
295; CHECK-NEXT:    [[SEXT_A_4:%.*]] = sext i16 [[LD_A_4]] to i32
296; CHECK-NEXT:    [[LD_B_4:%.*]] = load i16, ptr [[ADDR_B_4]], align 2
297; CHECK-NEXT:    [[SEXT_B_4:%.*]] = sext i16 [[LD_B_4]] to i32
298; CHECK-NEXT:    [[MUL_4:%.*]] = mul i32 [[SEXT_A_4]], [[SEXT_B_4]]
299; CHECK-NEXT:    [[ADDR_A_5:%.*]] = getelementptr i16, ptr [[A]], i32 5
300; CHECK-NEXT:    [[ADDR_B_5:%.*]] = getelementptr i16, ptr [[B]], i32 5
301; CHECK-NEXT:    [[LD_A_5:%.*]] = load i16, ptr [[ADDR_A_5]], align 2
302; CHECK-NEXT:    [[SEXT_A_5:%.*]] = sext i16 [[LD_A_5]] to i32
303; CHECK-NEXT:    [[LD_B_5:%.*]] = load i16, ptr [[ADDR_B_5]], align 2
304; CHECK-NEXT:    [[SEXT_B_5:%.*]] = sext i16 [[LD_B_5]] to i32
305; CHECK-NEXT:    [[MUL_5:%.*]] = mul i32 [[SEXT_A_5]], [[SEXT_B_5]]
306; CHECK-NEXT:    [[ADD_5:%.*]] = add i32 [[MUL_4]], [[MUL_5]]
307; CHECK-NEXT:    [[ADDR_A_6:%.*]] = getelementptr i16, ptr [[A]], i32 6
308; CHECK-NEXT:    [[ADDR_B_6:%.*]] = getelementptr i16, ptr [[B]], i32 6
309; CHECK-NEXT:    [[LD_A_6:%.*]] = load i16, ptr [[ADDR_A_6]], align 2
310; CHECK-NEXT:    [[SEXT_A_6:%.*]] = sext i16 [[LD_A_6]] to i32
311; CHECK-NEXT:    [[LD_B_6:%.*]] = load i16, ptr [[ADDR_B_6]], align 2
312; CHECK-NEXT:    [[SEXT_B_6:%.*]] = sext i16 [[LD_B_6]] to i32
313; CHECK-NEXT:    [[MUL_6:%.*]] = mul i32 [[SEXT_A_6]], [[SEXT_B_6]]
314; CHECK-NEXT:    [[ADDR_A_7:%.*]] = getelementptr i16, ptr [[A]], i32 7
315; CHECK-NEXT:    [[ADDR_B_7:%.*]] = getelementptr i16, ptr [[B]], i32 7
316; CHECK-NEXT:    [[LD_A_7:%.*]] = load i16, ptr [[ADDR_A_7]], align 2
317; CHECK-NEXT:    [[SEXT_A_7:%.*]] = sext i16 [[LD_A_7]] to i32
318; CHECK-NEXT:    [[LD_B_7:%.*]] = load i16, ptr [[ADDR_B_7]], align 2
319; CHECK-NEXT:    [[SEXT_B_7:%.*]] = sext i16 [[LD_B_7]] to i32
320; CHECK-NEXT:    [[MUL_7:%.*]] = mul i32 [[SEXT_A_7]], [[SEXT_B_7]]
321; CHECK-NEXT:    [[ADD_7:%.*]] = add i32 [[MUL_6]], [[MUL_7]]
322; CHECK-NEXT:    [[ADDR_A_8:%.*]] = getelementptr i16, ptr [[A]], i32 7
323; CHECK-NEXT:    [[ADDR_B_8:%.*]] = getelementptr i16, ptr [[B]], i32 7
324; CHECK-NEXT:    [[LD_A_8:%.*]] = load i16, ptr [[ADDR_A_8]], align 2
325; CHECK-NEXT:    [[SEXT_A_8:%.*]] = sext i16 [[LD_A_8]] to i32
326; CHECK-NEXT:    [[LD_B_8:%.*]] = load i16, ptr [[ADDR_B_8]], align 2
327; CHECK-NEXT:    [[SEXT_B_8:%.*]] = sext i16 [[LD_B_8]] to i32
328; CHECK-NEXT:    [[MUL_8:%.*]] = mul i32 [[SEXT_A_8]], [[SEXT_B_8]]
329; CHECK-NEXT:    [[ADD_10:%.*]] = add i32 [[ADD_7]], [[ADD_5]]
330; CHECK-NEXT:    [[ADD_11:%.*]] = add i32 [[ADD_3]], [[ADD_0]]
331; CHECK-NEXT:    [[ADD_12:%.*]] = add i32 [[ADD_10]], [[ADD_11]]
332; CHECK-NEXT:    [[ADD_13:%.*]] = add i32 [[ADD_12]], [[ACC:%.*]]
333; CHECK-NEXT:    [[RES:%.*]] = add i32 [[ADD_13]], [[MUL_8]]
334; CHECK-NEXT:    ret i32 [[RES]]
335;
336entry:
337  %ld.a.0 = load i16, ptr %a
338  %sext.a.0 = sext i16 %ld.a.0 to i32
339  %ld.b.0 = load i16, ptr %b
340  %sext.b.0 = sext i16 %ld.b.0 to i32
341  %mul.0 = mul i32 %sext.a.0, %sext.b.0
342  %addr.a.1 = getelementptr i16, ptr %a, i32 1
343  %addr.b.1 = getelementptr i16, ptr %b, i32 1
344  %ld.a.1 = load i16, ptr %addr.a.1
345  %sext.a.1 = sext i16 %ld.a.1 to i32
346  %ld.b.1 = load i16, ptr %addr.b.1
347  %sext.b.1 = sext i16 %ld.b.1 to i32
348  %mul.1 = mul i32 %sext.a.1, %sext.b.1
349  %add.0 = add i32 %mul.0, %mul.1
350
351  %addr.a.2 = getelementptr i16, ptr %a, i32 2
352  %addr.b.2 = getelementptr i16, ptr %b, i32 2
353  %ld.a.2 = load i16, ptr %addr.a.2
354  %sext.a.2 = sext i16 %ld.a.2 to i32
355  %ld.b.2 = load i16, ptr %addr.b.2
356  %sext.b.2 = sext i16 %ld.b.2 to i32
357  %mul.2 = mul i32 %sext.a.0, %sext.b.0
358  %addr.a.3 = getelementptr i16, ptr %a, i32 3
359  %addr.b.3 = getelementptr i16, ptr %b, i32 3
360  %ld.a.3 = load i16, ptr %addr.a.3
361  %sext.a.3 = sext i16 %ld.a.3 to i32
362  %ld.b.3 = load i16, ptr %addr.b.3
363  %sext.b.3 = sext i16 %ld.b.3 to i32
364  %mul.3 = mul i32 %sext.a.1, %sext.b.3
365  %add.3 = add i32 %mul.2, %mul.3
366
367  %addr.a.4 = getelementptr i16, ptr %a, i32 4
368  %addr.b.4 = getelementptr i16, ptr %b, i32 4
369  %ld.a.4 = load i16, ptr %addr.a.4
370  %sext.a.4 = sext i16 %ld.a.4 to i32
371  %ld.b.4 = load i16, ptr %addr.b.4
372  %sext.b.4 = sext i16 %ld.b.4 to i32
373  %mul.4 = mul i32 %sext.a.4, %sext.b.4
374  %addr.a.5 = getelementptr i16, ptr %a, i32 5
375  %addr.b.5 = getelementptr i16, ptr %b, i32 5
376  %ld.a.5 = load i16, ptr %addr.a.5
377  %sext.a.5 = sext i16 %ld.a.5 to i32
378  %ld.b.5 = load i16, ptr %addr.b.5
379  %sext.b.5 = sext i16 %ld.b.5 to i32
380  %mul.5 = mul i32 %sext.a.5, %sext.b.5
381  %add.5 = add i32 %mul.4, %mul.5
382
383  %addr.a.6 = getelementptr i16, ptr %a, i32 6
384  %addr.b.6 = getelementptr i16, ptr %b, i32 6
385  %ld.a.6 = load i16, ptr %addr.a.6
386  %sext.a.6 = sext i16 %ld.a.6 to i32
387  %ld.b.6 = load i16, ptr %addr.b.6
388  %sext.b.6 = sext i16 %ld.b.6 to i32
389  %mul.6 = mul i32 %sext.a.6, %sext.b.6
390  %addr.a.7 = getelementptr i16, ptr %a, i32 7
391  %addr.b.7 = getelementptr i16, ptr %b, i32 7
392  %ld.a.7 = load i16, ptr %addr.a.7
393  %sext.a.7 = sext i16 %ld.a.7 to i32
394  %ld.b.7 = load i16, ptr %addr.b.7
395  %sext.b.7 = sext i16 %ld.b.7 to i32
396  %mul.7 = mul i32 %sext.a.7, %sext.b.7
397  %add.7 = add i32 %mul.6, %mul.7
398
399  %addr.a.8 = getelementptr i16, ptr %a, i32 7
400  %addr.b.8 = getelementptr i16, ptr %b, i32 7
401  %ld.a.8 = load i16, ptr %addr.a.8
402  %sext.a.8 = sext i16 %ld.a.8 to i32
403  %ld.b.8 = load i16, ptr %addr.b.8
404  %sext.b.8 = sext i16 %ld.b.8 to i32
405  %mul.8 = mul i32 %sext.a.8, %sext.b.8
406
407  %add.10 = add i32 %add.7, %add.5
408  %add.11 = add i32 %add.3, %add.0
409  %add.12 = add i32 %add.10, %add.11
410  %add.13 = add i32 %add.12, %acc
411  %res = add i32 %add.13, %mul.8
412  ret i32 %res
413}
414