xref: /llvm-project/llvm/test/CodeGen/ARM/ParallelDSP/multi-use-loads.ll (revision 7b3bbd83c0c24087072ec5b22a76799ab31f87d5)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -O3 -mtriple=armv8m.main-none-none-eabi -mattr=+dsp < %s | FileCheck %s --check-prefixes=CHECK-LE
3; RUN: llc -O3 -mtriple=armv8m.maineb-none-none-eabi -mattr=+dsp < %s | FileCheck %s --check-prefixes=CHECK-BE
4
5define i32 @add_user(i32 %arg, ptr nocapture readnone %arg1, ptr nocapture readonly %arg2, ptr nocapture readonly %arg3) {
6; CHECK-LE-LABEL: add_user:
7; CHECK-LE:       @ %bb.0: @ %entry
8; CHECK-LE-NEXT:    cmp r0, #1
9; CHECK-LE-NEXT:    blt .LBB0_4
10; CHECK-LE-NEXT:  @ %bb.1: @ %for.body.preheader
11; CHECK-LE-NEXT:    .save {r4, lr}
12; CHECK-LE-NEXT:    push {r4, lr}
13; CHECK-LE-NEXT:    sub.w lr, r3, #2
14; CHECK-LE-NEXT:    subs r2, #2
15; CHECK-LE-NEXT:    mov.w r12, #0
16; CHECK-LE-NEXT:    movs r1, #0
17; CHECK-LE-NEXT:  .LBB0_2: @ %for.body
18; CHECK-LE-NEXT:    @ =>This Inner Loop Header: Depth=1
19; CHECK-LE-NEXT:    ldr r3, [lr, #2]!
20; CHECK-LE-NEXT:    subs r0, #1
21; CHECK-LE-NEXT:    ldr r4, [r2, #2]!
22; CHECK-LE-NEXT:    sxtah r1, r1, r3
23; CHECK-LE-NEXT:    smlad r12, r4, r3, r12
24; CHECK-LE-NEXT:    bne .LBB0_2
25; CHECK-LE-NEXT:  @ %bb.3:
26; CHECK-LE-NEXT:    pop.w {r4, lr}
27; CHECK-LE-NEXT:    add.w r0, r12, r1
28; CHECK-LE-NEXT:    bx lr
29; CHECK-LE-NEXT:  .LBB0_4:
30; CHECK-LE-NEXT:    mov.w r12, #0
31; CHECK-LE-NEXT:    movs r1, #0
32; CHECK-LE-NEXT:    add.w r0, r12, r1
33; CHECK-LE-NEXT:    bx lr
34;
35; CHECK-BE-LABEL: add_user:
36; CHECK-BE:       @ %bb.0: @ %entry
37; CHECK-BE-NEXT:    cmp r0, #1
38; CHECK-BE-NEXT:    blt .LBB0_4
39; CHECK-BE-NEXT:  @ %bb.1: @ %for.body.preheader
40; CHECK-BE-NEXT:    .save {r4, r5, r7, lr}
41; CHECK-BE-NEXT:    push {r4, r5, r7, lr}
42; CHECK-BE-NEXT:    subs r3, #2
43; CHECK-BE-NEXT:    subs r2, #2
44; CHECK-BE-NEXT:    mov.w r12, #0
45; CHECK-BE-NEXT:    movs r1, #0
46; CHECK-BE-NEXT:  .LBB0_2: @ %for.body
47; CHECK-BE-NEXT:    @ =>This Inner Loop Header: Depth=1
48; CHECK-BE-NEXT:    ldrsh lr, [r3, #2]!
49; CHECK-BE-NEXT:    subs r0, #1
50; CHECK-BE-NEXT:    ldrsh r4, [r2, #2]!
51; CHECK-BE-NEXT:    add r1, lr
52; CHECK-BE-NEXT:    ldrsh.w r5, [r2, #2]
53; CHECK-BE-NEXT:    smlabb r12, r4, lr, r12
54; CHECK-BE-NEXT:    ldrsh.w r4, [r3, #2]
55; CHECK-BE-NEXT:    smlabb r12, r5, r4, r12
56; CHECK-BE-NEXT:    bne .LBB0_2
57; CHECK-BE-NEXT:  @ %bb.3:
58; CHECK-BE-NEXT:    pop.w {r4, r5, r7, lr}
59; CHECK-BE-NEXT:    add.w r0, r12, r1
60; CHECK-BE-NEXT:    bx lr
61; CHECK-BE-NEXT:  .LBB0_4:
62; CHECK-BE-NEXT:    mov.w r12, #0
63; CHECK-BE-NEXT:    movs r1, #0
64; CHECK-BE-NEXT:    add.w r0, r12, r1
65; CHECK-BE-NEXT:    bx lr
66entry:
67  %cmp24 = icmp sgt i32 %arg, 0
68  br i1 %cmp24, label %for.body.preheader, label %for.cond.cleanup
69
70for.body.preheader:
71  %.pre = load i16, ptr %arg3, align 2
72  %.pre27 = load i16, ptr %arg2, align 2
73  br label %for.body
74
75for.cond.cleanup:
76  %mac1.0.lcssa = phi i32 [ 0, %entry ], [ %add11, %for.body ]
77  %count.final = phi i32 [ 0, %entry ], [ %count.next, %for.body ]
78  %res = add i32 %mac1.0.lcssa, %count.final
79  ret i32 %res
80
81for.body:
82  %mac1.026 = phi i32 [ %add11, %for.body ], [ 0, %for.body.preheader ]
83  %i.025 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ]
84  %count = phi i32 [ %count.next, %for.body ], [ 0, %for.body.preheader ]
85  %arrayidx = getelementptr inbounds i16, ptr %arg3, i32 %i.025
86  %0 = load i16, ptr %arrayidx, align 2
87  %add = add nuw nsw i32 %i.025, 1
88  %arrayidx1 = getelementptr inbounds i16, ptr %arg3, i32 %add
89  %1 = load i16, ptr %arrayidx1, align 2
90  %arrayidx3 = getelementptr inbounds i16, ptr %arg2, i32 %i.025
91  %2 = load i16, ptr %arrayidx3, align 2
92  %conv = sext i16 %2 to i32
93  %conv4 = sext i16 %0 to i32
94  %count.next = add i32 %conv4, %count
95  %mul = mul nsw i32 %conv, %conv4
96  %arrayidx6 = getelementptr inbounds i16, ptr %arg2, i32 %add
97  %3 = load i16, ptr %arrayidx6, align 2
98  %conv7 = sext i16 %3 to i32
99  %conv8 = sext i16 %1 to i32
100  %mul9 = mul nsw i32 %conv7, %conv8
101  %add10 = add i32 %mul, %mac1.026
102  %add11 = add i32 %mul9, %add10
103  %exitcond = icmp ne i32 %add, %arg
104  br i1 %exitcond, label %for.body, label %for.cond.cleanup
105}
106
107define i32 @mul_bottom_user(i32 %arg, ptr nocapture readnone %arg1, ptr nocapture readonly %arg2, ptr nocapture readonly %arg3) {
108; CHECK-LE-LABEL: mul_bottom_user:
109; CHECK-LE:       @ %bb.0: @ %entry
110; CHECK-LE-NEXT:    cmp r0, #1
111; CHECK-LE-NEXT:    blt .LBB1_4
112; CHECK-LE-NEXT:  @ %bb.1: @ %for.body.preheader
113; CHECK-LE-NEXT:    .save {r4, lr}
114; CHECK-LE-NEXT:    push {r4, lr}
115; CHECK-LE-NEXT:    sub.w lr, r3, #2
116; CHECK-LE-NEXT:    subs r2, #2
117; CHECK-LE-NEXT:    mov.w r12, #0
118; CHECK-LE-NEXT:    movs r1, #0
119; CHECK-LE-NEXT:  .LBB1_2: @ %for.body
120; CHECK-LE-NEXT:    @ =>This Inner Loop Header: Depth=1
121; CHECK-LE-NEXT:    ldr r3, [lr, #2]!
122; CHECK-LE-NEXT:    subs r0, #1
123; CHECK-LE-NEXT:    ldr r4, [r2, #2]!
124; CHECK-LE-NEXT:    smlad r12, r4, r3, r12
125; CHECK-LE-NEXT:    sxth r3, r3
126; CHECK-LE-NEXT:    mul r1, r3, r1
127; CHECK-LE-NEXT:    bne .LBB1_2
128; CHECK-LE-NEXT:  @ %bb.3:
129; CHECK-LE-NEXT:    pop.w {r4, lr}
130; CHECK-LE-NEXT:    add.w r0, r12, r1
131; CHECK-LE-NEXT:    bx lr
132; CHECK-LE-NEXT:  .LBB1_4:
133; CHECK-LE-NEXT:    mov.w r12, #0
134; CHECK-LE-NEXT:    movs r1, #0
135; CHECK-LE-NEXT:    add.w r0, r12, r1
136; CHECK-LE-NEXT:    bx lr
137;
138; CHECK-BE-LABEL: mul_bottom_user:
139; CHECK-BE:       @ %bb.0: @ %entry
140; CHECK-BE-NEXT:    cmp r0, #1
141; CHECK-BE-NEXT:    blt .LBB1_4
142; CHECK-BE-NEXT:  @ %bb.1: @ %for.body.preheader
143; CHECK-BE-NEXT:    .save {r4, r5, r7, lr}
144; CHECK-BE-NEXT:    push {r4, r5, r7, lr}
145; CHECK-BE-NEXT:    subs r3, #2
146; CHECK-BE-NEXT:    subs r2, #2
147; CHECK-BE-NEXT:    mov.w r12, #0
148; CHECK-BE-NEXT:    movs r1, #0
149; CHECK-BE-NEXT:  .LBB1_2: @ %for.body
150; CHECK-BE-NEXT:    @ =>This Inner Loop Header: Depth=1
151; CHECK-BE-NEXT:    ldrsh lr, [r3, #2]!
152; CHECK-BE-NEXT:    subs r0, #1
153; CHECK-BE-NEXT:    ldrsh r4, [r2, #2]!
154; CHECK-BE-NEXT:    ldrsh.w r5, [r2, #2]
155; CHECK-BE-NEXT:    mul r1, lr, r1
156; CHECK-BE-NEXT:    smlabb r12, r4, lr, r12
157; CHECK-BE-NEXT:    ldrsh.w r4, [r3, #2]
158; CHECK-BE-NEXT:    smlabb r12, r5, r4, r12
159; CHECK-BE-NEXT:    bne .LBB1_2
160; CHECK-BE-NEXT:  @ %bb.3:
161; CHECK-BE-NEXT:    pop.w {r4, r5, r7, lr}
162; CHECK-BE-NEXT:    add.w r0, r12, r1
163; CHECK-BE-NEXT:    bx lr
164; CHECK-BE-NEXT:  .LBB1_4:
165; CHECK-BE-NEXT:    mov.w r12, #0
166; CHECK-BE-NEXT:    movs r1, #0
167; CHECK-BE-NEXT:    add.w r0, r12, r1
168; CHECK-BE-NEXT:    bx lr
169entry:
170  %cmp24 = icmp sgt i32 %arg, 0
171  br i1 %cmp24, label %for.body.preheader, label %for.cond.cleanup
172
173for.body.preheader:
174  %.pre = load i16, ptr %arg3, align 2
175  %.pre27 = load i16, ptr %arg2, align 2
176  br label %for.body
177
178for.cond.cleanup:
179  %mac1.0.lcssa = phi i32 [ 0, %entry ], [ %add11, %for.body ]
180  %count.final = phi i32 [ 0, %entry ], [ %count.next, %for.body ]
181  %res = add i32 %mac1.0.lcssa, %count.final
182  ret i32 %res
183
184for.body:
185  %mac1.026 = phi i32 [ %add11, %for.body ], [ 0, %for.body.preheader ]
186  %i.025 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ]
187  %count = phi i32 [ %count.next, %for.body ], [ 0, %for.body.preheader ]
188  %arrayidx = getelementptr inbounds i16, ptr %arg3, i32 %i.025
189  %0 = load i16, ptr %arrayidx, align 2
190  %add = add nuw nsw i32 %i.025, 1
191  %arrayidx1 = getelementptr inbounds i16, ptr %arg3, i32 %add
192  %1 = load i16, ptr %arrayidx1, align 2
193  %arrayidx3 = getelementptr inbounds i16, ptr %arg2, i32 %i.025
194  %2 = load i16, ptr %arrayidx3, align 2
195  %conv = sext i16 %2 to i32
196  %conv4 = sext i16 %0 to i32
197  %mul = mul nsw i32 %conv, %conv4
198  %arrayidx6 = getelementptr inbounds i16, ptr %arg2, i32 %add
199  %3 = load i16, ptr %arrayidx6, align 2
200  %conv7 = sext i16 %3 to i32
201  %conv8 = sext i16 %1 to i32
202  %mul9 = mul nsw i32 %conv7, %conv8
203  %add10 = add i32 %mul, %mac1.026
204  %add11 = add i32 %mul9, %add10
205  %count.next = mul i32 %conv4, %count
206  %exitcond = icmp ne i32 %add, %arg
207  br i1 %exitcond, label %for.body, label %for.cond.cleanup
208}
209
210define i32 @mul_top_user(i32 %arg, ptr nocapture readnone %arg1, ptr nocapture readonly %arg2, ptr nocapture readonly %arg3) {
211; CHECK-LE-LABEL: mul_top_user:
212; CHECK-LE:       @ %bb.0: @ %entry
213; CHECK-LE-NEXT:    cmp r0, #1
214; CHECK-LE-NEXT:    blt .LBB2_4
215; CHECK-LE-NEXT:  @ %bb.1: @ %for.body.preheader
216; CHECK-LE-NEXT:    .save {r4, lr}
217; CHECK-LE-NEXT:    push {r4, lr}
218; CHECK-LE-NEXT:    subs r3, #2
219; CHECK-LE-NEXT:    subs r2, #2
220; CHECK-LE-NEXT:    mov.w r12, #0
221; CHECK-LE-NEXT:    movs r1, #0
222; CHECK-LE-NEXT:  .LBB2_2: @ %for.body
223; CHECK-LE-NEXT:    @ =>This Inner Loop Header: Depth=1
224; CHECK-LE-NEXT:    ldr lr, [r3, #2]!
225; CHECK-LE-NEXT:    subs r0, #1
226; CHECK-LE-NEXT:    ldr r4, [r2, #2]!
227; CHECK-LE-NEXT:    smlad r12, r4, lr, r12
228; CHECK-LE-NEXT:    asr.w r4, r4, #16
229; CHECK-LE-NEXT:    mul r1, r4, r1
230; CHECK-LE-NEXT:    bne .LBB2_2
231; CHECK-LE-NEXT:  @ %bb.3:
232; CHECK-LE-NEXT:    pop.w {r4, lr}
233; CHECK-LE-NEXT:    add.w r0, r12, r1
234; CHECK-LE-NEXT:    bx lr
235; CHECK-LE-NEXT:  .LBB2_4:
236; CHECK-LE-NEXT:    mov.w r12, #0
237; CHECK-LE-NEXT:    movs r1, #0
238; CHECK-LE-NEXT:    add.w r0, r12, r1
239; CHECK-LE-NEXT:    bx lr
240;
241; CHECK-BE-LABEL: mul_top_user:
242; CHECK-BE:       @ %bb.0: @ %entry
243; CHECK-BE-NEXT:    cmp r0, #1
244; CHECK-BE-NEXT:    blt .LBB2_4
245; CHECK-BE-NEXT:  @ %bb.1: @ %for.body.preheader
246; CHECK-BE-NEXT:    .save {r4, lr}
247; CHECK-BE-NEXT:    push {r4, lr}
248; CHECK-BE-NEXT:    subs r3, #2
249; CHECK-BE-NEXT:    subs r2, #2
250; CHECK-BE-NEXT:    mov.w r12, #0
251; CHECK-BE-NEXT:    movs r1, #0
252; CHECK-BE-NEXT:  .LBB2_2: @ %for.body
253; CHECK-BE-NEXT:    @ =>This Inner Loop Header: Depth=1
254; CHECK-BE-NEXT:    ldrsh lr, [r3, #2]!
255; CHECK-BE-NEXT:    subs r0, #1
256; CHECK-BE-NEXT:    ldrsh r4, [r2, #2]!
257; CHECK-BE-NEXT:    smlabb r12, r4, lr, r12
258; CHECK-BE-NEXT:    ldrsh.w r4, [r2, #2]
259; CHECK-BE-NEXT:    ldrsh.w lr, [r3, #2]
260; CHECK-BE-NEXT:    mul r1, r4, r1
261; CHECK-BE-NEXT:    smlabb r12, r4, lr, r12
262; CHECK-BE-NEXT:    bne .LBB2_2
263; CHECK-BE-NEXT:  @ %bb.3:
264; CHECK-BE-NEXT:    pop.w {r4, lr}
265; CHECK-BE-NEXT:    add.w r0, r12, r1
266; CHECK-BE-NEXT:    bx lr
267; CHECK-BE-NEXT:  .LBB2_4:
268; CHECK-BE-NEXT:    mov.w r12, #0
269; CHECK-BE-NEXT:    movs r1, #0
270; CHECK-BE-NEXT:    add.w r0, r12, r1
271; CHECK-BE-NEXT:    bx lr
272entry:
273  %cmp24 = icmp sgt i32 %arg, 0
274  br i1 %cmp24, label %for.body.preheader, label %for.cond.cleanup
275
276for.body.preheader:
277  %.pre = load i16, ptr %arg3, align 2
278  %.pre27 = load i16, ptr %arg2, align 2
279  br label %for.body
280
281for.cond.cleanup:
282  %mac1.0.lcssa = phi i32 [ 0, %entry ], [ %add11, %for.body ]
283  %count.final = phi i32 [ 0, %entry ], [ %count.next, %for.body ]
284  %res = add i32 %mac1.0.lcssa, %count.final
285  ret i32 %res
286
287for.body:
288  %mac1.026 = phi i32 [ %add11, %for.body ], [ 0, %for.body.preheader ]
289  %i.025 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ]
290  %count = phi i32 [ %count.next, %for.body ], [ 0, %for.body.preheader ]
291  %arrayidx = getelementptr inbounds i16, ptr %arg3, i32 %i.025
292  %0 = load i16, ptr %arrayidx, align 2
293  %add = add nuw nsw i32 %i.025, 1
294  %arrayidx1 = getelementptr inbounds i16, ptr %arg3, i32 %add
295  %1 = load i16, ptr %arrayidx1, align 2
296  %arrayidx3 = getelementptr inbounds i16, ptr %arg2, i32 %i.025
297  %2 = load i16, ptr %arrayidx3, align 2
298  %conv = sext i16 %2 to i32
299  %conv4 = sext i16 %0 to i32
300  %mul = mul nsw i32 %conv, %conv4
301  %arrayidx6 = getelementptr inbounds i16, ptr %arg2, i32 %add
302  %3 = load i16, ptr %arrayidx6, align 2
303  %conv7 = sext i16 %3 to i32
304  %conv8 = sext i16 %1 to i32
305  %mul9 = mul nsw i32 %conv7, %conv8
306  %add10 = add i32 %mul, %mac1.026
307  %add11 = add i32 %mul9, %add10
308  %count.next = mul i32 %conv7, %count
309  %exitcond = icmp ne i32 %add, %arg
310  br i1 %exitcond, label %for.body, label %for.cond.cleanup
311}
312
313define i32 @and_user(i32 %arg, ptr nocapture readnone %arg1, ptr nocapture readonly %arg2, ptr nocapture readonly %arg3) {
314; CHECK-LE-LABEL: and_user:
315; CHECK-LE:       @ %bb.0: @ %entry
316; CHECK-LE-NEXT:    cmp r0, #1
317; CHECK-LE-NEXT:    blt .LBB3_4
318; CHECK-LE-NEXT:  @ %bb.1: @ %for.body.preheader
319; CHECK-LE-NEXT:    .save {r4, lr}
320; CHECK-LE-NEXT:    push {r4, lr}
321; CHECK-LE-NEXT:    sub.w lr, r3, #2
322; CHECK-LE-NEXT:    subs r2, #2
323; CHECK-LE-NEXT:    mov.w r12, #0
324; CHECK-LE-NEXT:    movs r1, #0
325; CHECK-LE-NEXT:  .LBB3_2: @ %for.body
326; CHECK-LE-NEXT:    @ =>This Inner Loop Header: Depth=1
327; CHECK-LE-NEXT:    ldr r3, [lr, #2]!
328; CHECK-LE-NEXT:    subs r0, #1
329; CHECK-LE-NEXT:    ldr r4, [r2, #2]!
330; CHECK-LE-NEXT:    smlad r12, r4, r3, r12
331; CHECK-LE-NEXT:    uxth r3, r3
332; CHECK-LE-NEXT:    mul r1, r3, r1
333; CHECK-LE-NEXT:    bne .LBB3_2
334; CHECK-LE-NEXT:  @ %bb.3:
335; CHECK-LE-NEXT:    pop.w {r4, lr}
336; CHECK-LE-NEXT:    add.w r0, r12, r1
337; CHECK-LE-NEXT:    bx lr
338; CHECK-LE-NEXT:  .LBB3_4:
339; CHECK-LE-NEXT:    mov.w r12, #0
340; CHECK-LE-NEXT:    movs r1, #0
341; CHECK-LE-NEXT:    add.w r0, r12, r1
342; CHECK-LE-NEXT:    bx lr
343;
344; CHECK-BE-LABEL: and_user:
345; CHECK-BE:       @ %bb.0: @ %entry
346; CHECK-BE-NEXT:    cmp r0, #1
347; CHECK-BE-NEXT:    blt .LBB3_4
348; CHECK-BE-NEXT:  @ %bb.1: @ %for.body.preheader
349; CHECK-BE-NEXT:    .save {r4, r5, r7, lr}
350; CHECK-BE-NEXT:    push {r4, r5, r7, lr}
351; CHECK-BE-NEXT:    subs r3, #2
352; CHECK-BE-NEXT:    subs r2, #2
353; CHECK-BE-NEXT:    mov.w r12, #0
354; CHECK-BE-NEXT:    movs r1, #0
355; CHECK-BE-NEXT:  .LBB3_2: @ %for.body
356; CHECK-BE-NEXT:    @ =>This Inner Loop Header: Depth=1
357; CHECK-BE-NEXT:    ldrh lr, [r3, #2]!
358; CHECK-BE-NEXT:    subs r0, #1
359; CHECK-BE-NEXT:    ldrsh r4, [r2, #2]!
360; CHECK-BE-NEXT:    ldrsh.w r5, [r2, #2]
361; CHECK-BE-NEXT:    mul r1, lr, r1
362; CHECK-BE-NEXT:    smlabb r12, r4, lr, r12
363; CHECK-BE-NEXT:    ldrsh.w r4, [r3, #2]
364; CHECK-BE-NEXT:    smlabb r12, r5, r4, r12
365; CHECK-BE-NEXT:    bne .LBB3_2
366; CHECK-BE-NEXT:  @ %bb.3:
367; CHECK-BE-NEXT:    pop.w {r4, r5, r7, lr}
368; CHECK-BE-NEXT:    add.w r0, r12, r1
369; CHECK-BE-NEXT:    bx lr
370; CHECK-BE-NEXT:  .LBB3_4:
371; CHECK-BE-NEXT:    mov.w r12, #0
372; CHECK-BE-NEXT:    movs r1, #0
373; CHECK-BE-NEXT:    add.w r0, r12, r1
374; CHECK-BE-NEXT:    bx lr
375entry:
376  %cmp24 = icmp sgt i32 %arg, 0
377  br i1 %cmp24, label %for.body.preheader, label %for.cond.cleanup
378
379for.body.preheader:
380  %.pre = load i16, ptr %arg3, align 2
381  %.pre27 = load i16, ptr %arg2, align 2
382  br label %for.body
383
384for.cond.cleanup:
385  %mac1.0.lcssa = phi i32 [ 0, %entry ], [ %add11, %for.body ]
386  %count.final = phi i32 [ 0, %entry ], [ %count.next, %for.body ]
387  %res = add i32 %mac1.0.lcssa, %count.final
388  ret i32 %res
389
390for.body:
391  %mac1.026 = phi i32 [ %add11, %for.body ], [ 0, %for.body.preheader ]
392  %i.025 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ]
393  %count = phi i32 [ %count.next, %for.body ], [ 0, %for.body.preheader ]
394  %arrayidx = getelementptr inbounds i16, ptr %arg3, i32 %i.025
395  %0 = load i16, ptr %arrayidx, align 2
396  %add = add nuw nsw i32 %i.025, 1
397  %arrayidx1 = getelementptr inbounds i16, ptr %arg3, i32 %add
398  %arrayidx3 = getelementptr inbounds i16, ptr %arg2, i32 %i.025
399  %arrayidx6 = getelementptr inbounds i16, ptr %arg2, i32 %add
400  %1 = load i16, ptr %arrayidx1, align 2
401  %2 = load i16, ptr %arrayidx3, align 2
402  %conv = sext i16 %2 to i32
403  %conv4 = sext i16 %0 to i32
404  %bottom = and i32 %conv4, 65535
405  %mul = mul nsw i32 %conv, %conv4
406  %3 = load i16, ptr %arrayidx6, align 2
407  %conv7 = sext i16 %3 to i32
408  %conv8 = sext i16 %1 to i32
409  %mul9 = mul nsw i32 %conv7, %conv8
410  %add10 = add i32 %mul, %mac1.026
411  %add11 = add i32 %mul9, %add10
412  %count.next = mul i32 %bottom, %count
413  %exitcond = icmp ne i32 %add, %arg
414  br i1 %exitcond, label %for.body, label %for.cond.cleanup
415}
416
417define i32 @multi_uses(i32 %arg, ptr nocapture readnone %arg1, ptr nocapture readonly %arg2, ptr nocapture readonly %arg3) {
418; CHECK-LE-LABEL: multi_uses:
419; CHECK-LE:       @ %bb.0: @ %entry
420; CHECK-LE-NEXT:    .save {r4, lr}
421; CHECK-LE-NEXT:    push {r4, lr}
422; CHECK-LE-NEXT:    cmp r0, #1
423; CHECK-LE-NEXT:    blt .LBB4_4
424; CHECK-LE-NEXT:  @ %bb.1: @ %for.body.preheader
425; CHECK-LE-NEXT:    subs r3, #2
426; CHECK-LE-NEXT:    subs r2, #2
427; CHECK-LE-NEXT:    mov.w lr, #0
428; CHECK-LE-NEXT:    mov.w r12, #0
429; CHECK-LE-NEXT:  .LBB4_2: @ %for.body
430; CHECK-LE-NEXT:    @ =>This Inner Loop Header: Depth=1
431; CHECK-LE-NEXT:    ldr r1, [r3, #2]!
432; CHECK-LE-NEXT:    subs r0, #1
433; CHECK-LE-NEXT:    ldr r4, [r2, #2]!
434; CHECK-LE-NEXT:    smlad lr, r4, r1, lr
435; CHECK-LE-NEXT:    eor.w r4, r1, r12
436; CHECK-LE-NEXT:    mul r1, r4, r1
437; CHECK-LE-NEXT:    lsl.w r12, r1, #16
438; CHECK-LE-NEXT:    bne .LBB4_2
439; CHECK-LE-NEXT:  @ %bb.3: @ %for.cond.cleanup
440; CHECK-LE-NEXT:    add.w r0, lr, r12
441; CHECK-LE-NEXT:    pop {r4, pc}
442; CHECK-LE-NEXT:  .LBB4_4:
443; CHECK-LE-NEXT:    mov.w lr, #0
444; CHECK-LE-NEXT:    mov.w r12, #0
445; CHECK-LE-NEXT:    add.w r0, lr, r12
446; CHECK-LE-NEXT:    pop {r4, pc}
447;
448; CHECK-BE-LABEL: multi_uses:
449; CHECK-BE:       @ %bb.0: @ %entry
450; CHECK-BE-NEXT:    .save {r4, r5, r7, lr}
451; CHECK-BE-NEXT:    push {r4, r5, r7, lr}
452; CHECK-BE-NEXT:    cmp r0, #1
453; CHECK-BE-NEXT:    blt .LBB4_4
454; CHECK-BE-NEXT:  @ %bb.1: @ %for.body.preheader
455; CHECK-BE-NEXT:    subs r3, #2
456; CHECK-BE-NEXT:    subs r2, #2
457; CHECK-BE-NEXT:    mov.w r12, #0
458; CHECK-BE-NEXT:    mov.w lr, #0
459; CHECK-BE-NEXT:  .LBB4_2: @ %for.body
460; CHECK-BE-NEXT:    @ =>This Inner Loop Header: Depth=1
461; CHECK-BE-NEXT:    ldrsh r4, [r2, #2]!
462; CHECK-BE-NEXT:    subs r0, #1
463; CHECK-BE-NEXT:    ldrsh r1, [r3, #2]!
464; CHECK-BE-NEXT:    ldrsh.w r5, [r2, #2]
465; CHECK-BE-NEXT:    smlabb r12, r4, r1, r12
466; CHECK-BE-NEXT:    ldrsh.w r4, [r3, #2]
467; CHECK-BE-NEXT:    smlabb r12, r5, r4, r12
468; CHECK-BE-NEXT:    eor.w r5, r1, lr
469; CHECK-BE-NEXT:    mul r1, r5, r1
470; CHECK-BE-NEXT:    lsl.w lr, r1, #16
471; CHECK-BE-NEXT:    bne .LBB4_2
472; CHECK-BE-NEXT:  @ %bb.3: @ %for.cond.cleanup
473; CHECK-BE-NEXT:    add.w r0, r12, lr
474; CHECK-BE-NEXT:    pop {r4, r5, r7, pc}
475; CHECK-BE-NEXT:  .LBB4_4:
476; CHECK-BE-NEXT:    mov.w r12, #0
477; CHECK-BE-NEXT:    mov.w lr, #0
478; CHECK-BE-NEXT:    add.w r0, r12, lr
479; CHECK-BE-NEXT:    pop {r4, r5, r7, pc}
480entry:
481  %cmp24 = icmp sgt i32 %arg, 0
482  br i1 %cmp24, label %for.body.preheader, label %for.cond.cleanup
483
484for.body.preheader:
485  %.pre = load i16, ptr %arg3, align 2
486  %.pre27 = load i16, ptr %arg2, align 2
487  br label %for.body
488
489for.cond.cleanup:
490  %mac1.0.lcssa = phi i32 [ 0, %entry ], [ %add11, %for.body ]
491  %count.final = phi i32 [ 0, %entry ], [ %count.next, %for.body ]
492  %res = add i32 %mac1.0.lcssa, %count.final
493  ret i32 %res
494
495for.body:
496  %mac1.026 = phi i32 [ %add11, %for.body ], [ 0, %for.body.preheader ]
497  %i.025 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ]
498  %count = phi i32 [ %count.next, %for.body ], [ 0, %for.body.preheader ]
499  %arrayidx = getelementptr inbounds i16, ptr %arg3, i32 %i.025
500  %0 = load i16, ptr %arrayidx, align 2
501  %add = add nuw nsw i32 %i.025, 1
502  %arrayidx1 = getelementptr inbounds i16, ptr %arg3, i32 %add
503  %arrayidx3 = getelementptr inbounds i16, ptr %arg2, i32 %i.025
504  %arrayidx6 = getelementptr inbounds i16, ptr %arg2, i32 %add
505  %1 = load i16, ptr %arrayidx1, align 2
506  %2 = load i16, ptr %arrayidx3, align 2
507  %conv = sext i16 %2 to i32
508  %conv4 = sext i16 %0 to i32
509  %bottom = and i32 %conv4, 65535
510  %mul = mul nsw i32 %conv, %conv4
511  %3 = load i16, ptr %arrayidx6, align 2
512  %conv7 = sext i16 %3 to i32
513  %conv8 = sext i16 %1 to i32
514  %mul9 = mul nsw i32 %conv7, %conv8
515  %add10 = add i32 %mul, %mac1.026
516  %shl = shl i32 %conv4, 16
517  %add11 = add i32 %mul9, %add10
518  %xor = xor i32 %bottom, %count
519  %count.next = mul i32 %xor, %shl
520  %exitcond = icmp ne i32 %add, %arg
521  br i1 %exitcond, label %for.body, label %for.cond.cleanup
522}
523