xref: /llvm-project/llvm/test/CodeGen/ARM/ParallelDSP/multi-use-loads.ll (revision 9bd7b149c2f577086a716ccd0363d057caa3d98a)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -O3 -mtriple=armv8m.main-none-none-eabi -mattr=+dsp < %s | FileCheck %s --check-prefixes=CHECK-LE
3; RUN: llc -O3 -mtriple=armv8m.maineb-none-none-eabi -mattr=+dsp < %s | FileCheck %s --check-prefixes=CHECK-BE
4
5define i32 @add_user(i32 %arg, ptr nocapture readnone %arg1, ptr nocapture readonly %arg2, ptr nocapture readonly %arg3) {
6; CHECK-LE-LABEL: add_user:
7; CHECK-LE:       @ %bb.0: @ %entry
8; CHECK-LE-NEXT:    .save {r4, lr}
9; CHECK-LE-NEXT:    push {r4, lr}
10; CHECK-LE-NEXT:    cmp r0, #1
11; CHECK-LE-NEXT:    blt .LBB0_4
12; CHECK-LE-NEXT:  @ %bb.1: @ %for.body.preheader
13; CHECK-LE-NEXT:    sub.w lr, r3, #2
14; CHECK-LE-NEXT:    subs r2, #2
15; CHECK-LE-NEXT:    mov.w r12, #0
16; CHECK-LE-NEXT:    movs r1, #0
17; CHECK-LE-NEXT:  .LBB0_2: @ %for.body
18; CHECK-LE-NEXT:    @ =>This Inner Loop Header: Depth=1
19; CHECK-LE-NEXT:    ldr r3, [lr, #2]!
20; CHECK-LE-NEXT:    subs r0, #1
21; CHECK-LE-NEXT:    ldr r4, [r2, #2]!
22; CHECK-LE-NEXT:    sxtah r1, r1, r3
23; CHECK-LE-NEXT:    smlad r12, r4, r3, r12
24; CHECK-LE-NEXT:    bne .LBB0_2
25; CHECK-LE-NEXT:  @ %bb.3: @ %for.cond.cleanup
26; CHECK-LE-NEXT:    add.w r0, r12, r1
27; CHECK-LE-NEXT:    pop {r4, pc}
28; CHECK-LE-NEXT:  .LBB0_4:
29; CHECK-LE-NEXT:    mov.w r12, #0
30; CHECK-LE-NEXT:    movs r1, #0
31; CHECK-LE-NEXT:    add.w r0, r12, r1
32; CHECK-LE-NEXT:    pop {r4, pc}
33;
34; CHECK-BE-LABEL: add_user:
35; CHECK-BE:       @ %bb.0: @ %entry
36; CHECK-BE-NEXT:    .save {r4, r5, r7, lr}
37; CHECK-BE-NEXT:    push {r4, r5, r7, lr}
38; CHECK-BE-NEXT:    cmp r0, #1
39; CHECK-BE-NEXT:    blt .LBB0_4
40; CHECK-BE-NEXT:  @ %bb.1: @ %for.body.preheader
41; CHECK-BE-NEXT:    subs r3, #2
42; CHECK-BE-NEXT:    subs r2, #2
43; CHECK-BE-NEXT:    mov.w r12, #0
44; CHECK-BE-NEXT:    movs r1, #0
45; CHECK-BE-NEXT:  .LBB0_2: @ %for.body
46; CHECK-BE-NEXT:    @ =>This Inner Loop Header: Depth=1
47; CHECK-BE-NEXT:    ldrsh lr, [r3, #2]!
48; CHECK-BE-NEXT:    subs r0, #1
49; CHECK-BE-NEXT:    ldrsh r4, [r2, #2]!
50; CHECK-BE-NEXT:    add r1, lr
51; CHECK-BE-NEXT:    ldrsh.w r5, [r2, #2]
52; CHECK-BE-NEXT:    smlabb r12, r4, lr, r12
53; CHECK-BE-NEXT:    ldrsh.w r4, [r3, #2]
54; CHECK-BE-NEXT:    smlabb r12, r5, r4, r12
55; CHECK-BE-NEXT:    bne .LBB0_2
56; CHECK-BE-NEXT:  @ %bb.3: @ %for.cond.cleanup
57; CHECK-BE-NEXT:    add.w r0, r12, r1
58; CHECK-BE-NEXT:    pop {r4, r5, r7, pc}
59; CHECK-BE-NEXT:  .LBB0_4:
60; CHECK-BE-NEXT:    mov.w r12, #0
61; CHECK-BE-NEXT:    movs r1, #0
62; CHECK-BE-NEXT:    add.w r0, r12, r1
63; CHECK-BE-NEXT:    pop {r4, r5, r7, pc}
64entry:
65  %cmp24 = icmp sgt i32 %arg, 0
66  br i1 %cmp24, label %for.body.preheader, label %for.cond.cleanup
67
68for.body.preheader:
69  %.pre = load i16, ptr %arg3, align 2
70  %.pre27 = load i16, ptr %arg2, align 2
71  br label %for.body
72
73for.cond.cleanup:
74  %mac1.0.lcssa = phi i32 [ 0, %entry ], [ %add11, %for.body ]
75  %count.final = phi i32 [ 0, %entry ], [ %count.next, %for.body ]
76  %res = add i32 %mac1.0.lcssa, %count.final
77  ret i32 %res
78
79for.body:
80  %mac1.026 = phi i32 [ %add11, %for.body ], [ 0, %for.body.preheader ]
81  %i.025 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ]
82  %count = phi i32 [ %count.next, %for.body ], [ 0, %for.body.preheader ]
83  %arrayidx = getelementptr inbounds i16, ptr %arg3, i32 %i.025
84  %0 = load i16, ptr %arrayidx, align 2
85  %add = add nuw nsw i32 %i.025, 1
86  %arrayidx1 = getelementptr inbounds i16, ptr %arg3, i32 %add
87  %1 = load i16, ptr %arrayidx1, align 2
88  %arrayidx3 = getelementptr inbounds i16, ptr %arg2, i32 %i.025
89  %2 = load i16, ptr %arrayidx3, align 2
90  %conv = sext i16 %2 to i32
91  %conv4 = sext i16 %0 to i32
92  %count.next = add i32 %conv4, %count
93  %mul = mul nsw i32 %conv, %conv4
94  %arrayidx6 = getelementptr inbounds i16, ptr %arg2, i32 %add
95  %3 = load i16, ptr %arrayidx6, align 2
96  %conv7 = sext i16 %3 to i32
97  %conv8 = sext i16 %1 to i32
98  %mul9 = mul nsw i32 %conv7, %conv8
99  %add10 = add i32 %mul, %mac1.026
100  %add11 = add i32 %mul9, %add10
101  %exitcond = icmp ne i32 %add, %arg
102  br i1 %exitcond, label %for.body, label %for.cond.cleanup
103}
104
105define i32 @mul_bottom_user(i32 %arg, ptr nocapture readnone %arg1, ptr nocapture readonly %arg2, ptr nocapture readonly %arg3) {
106; CHECK-LE-LABEL: mul_bottom_user:
107; CHECK-LE:       @ %bb.0: @ %entry
108; CHECK-LE-NEXT:    .save {r4, lr}
109; CHECK-LE-NEXT:    push {r4, lr}
110; CHECK-LE-NEXT:    cmp r0, #1
111; CHECK-LE-NEXT:    blt .LBB1_4
112; CHECK-LE-NEXT:  @ %bb.1: @ %for.body.preheader
113; CHECK-LE-NEXT:    sub.w lr, r3, #2
114; CHECK-LE-NEXT:    subs r2, #2
115; CHECK-LE-NEXT:    mov.w r12, #0
116; CHECK-LE-NEXT:    movs r1, #0
117; CHECK-LE-NEXT:  .LBB1_2: @ %for.body
118; CHECK-LE-NEXT:    @ =>This Inner Loop Header: Depth=1
119; CHECK-LE-NEXT:    ldr r3, [lr, #2]!
120; CHECK-LE-NEXT:    subs r0, #1
121; CHECK-LE-NEXT:    ldr r4, [r2, #2]!
122; CHECK-LE-NEXT:    smlad r12, r4, r3, r12
123; CHECK-LE-NEXT:    sxth r3, r3
124; CHECK-LE-NEXT:    mul r1, r3, r1
125; CHECK-LE-NEXT:    bne .LBB1_2
126; CHECK-LE-NEXT:  @ %bb.3: @ %for.cond.cleanup
127; CHECK-LE-NEXT:    add.w r0, r12, r1
128; CHECK-LE-NEXT:    pop {r4, pc}
129; CHECK-LE-NEXT:  .LBB1_4:
130; CHECK-LE-NEXT:    mov.w r12, #0
131; CHECK-LE-NEXT:    movs r1, #0
132; CHECK-LE-NEXT:    add.w r0, r12, r1
133; CHECK-LE-NEXT:    pop {r4, pc}
134;
135; CHECK-BE-LABEL: mul_bottom_user:
136; CHECK-BE:       @ %bb.0: @ %entry
137; CHECK-BE-NEXT:    .save {r4, r5, r7, lr}
138; CHECK-BE-NEXT:    push {r4, r5, r7, lr}
139; CHECK-BE-NEXT:    cmp r0, #1
140; CHECK-BE-NEXT:    blt .LBB1_4
141; CHECK-BE-NEXT:  @ %bb.1: @ %for.body.preheader
142; CHECK-BE-NEXT:    subs r3, #2
143; CHECK-BE-NEXT:    subs r2, #2
144; CHECK-BE-NEXT:    mov.w r12, #0
145; CHECK-BE-NEXT:    movs r1, #0
146; CHECK-BE-NEXT:  .LBB1_2: @ %for.body
147; CHECK-BE-NEXT:    @ =>This Inner Loop Header: Depth=1
148; CHECK-BE-NEXT:    ldrsh lr, [r3, #2]!
149; CHECK-BE-NEXT:    subs r0, #1
150; CHECK-BE-NEXT:    ldrsh r4, [r2, #2]!
151; CHECK-BE-NEXT:    ldrsh.w r5, [r2, #2]
152; CHECK-BE-NEXT:    mul r1, lr, r1
153; CHECK-BE-NEXT:    smlabb r12, r4, lr, r12
154; CHECK-BE-NEXT:    ldrsh.w r4, [r3, #2]
155; CHECK-BE-NEXT:    smlabb r12, r5, r4, r12
156; CHECK-BE-NEXT:    bne .LBB1_2
157; CHECK-BE-NEXT:  @ %bb.3: @ %for.cond.cleanup
158; CHECK-BE-NEXT:    add.w r0, r12, r1
159; CHECK-BE-NEXT:    pop {r4, r5, r7, pc}
160; CHECK-BE-NEXT:  .LBB1_4:
161; CHECK-BE-NEXT:    mov.w r12, #0
162; CHECK-BE-NEXT:    movs r1, #0
163; CHECK-BE-NEXT:    add.w r0, r12, r1
164; CHECK-BE-NEXT:    pop {r4, r5, r7, pc}
165entry:
166  %cmp24 = icmp sgt i32 %arg, 0
167  br i1 %cmp24, label %for.body.preheader, label %for.cond.cleanup
168
169for.body.preheader:
170  %.pre = load i16, ptr %arg3, align 2
171  %.pre27 = load i16, ptr %arg2, align 2
172  br label %for.body
173
174for.cond.cleanup:
175  %mac1.0.lcssa = phi i32 [ 0, %entry ], [ %add11, %for.body ]
176  %count.final = phi i32 [ 0, %entry ], [ %count.next, %for.body ]
177  %res = add i32 %mac1.0.lcssa, %count.final
178  ret i32 %res
179
180for.body:
181  %mac1.026 = phi i32 [ %add11, %for.body ], [ 0, %for.body.preheader ]
182  %i.025 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ]
183  %count = phi i32 [ %count.next, %for.body ], [ 0, %for.body.preheader ]
184  %arrayidx = getelementptr inbounds i16, ptr %arg3, i32 %i.025
185  %0 = load i16, ptr %arrayidx, align 2
186  %add = add nuw nsw i32 %i.025, 1
187  %arrayidx1 = getelementptr inbounds i16, ptr %arg3, i32 %add
188  %1 = load i16, ptr %arrayidx1, align 2
189  %arrayidx3 = getelementptr inbounds i16, ptr %arg2, i32 %i.025
190  %2 = load i16, ptr %arrayidx3, align 2
191  %conv = sext i16 %2 to i32
192  %conv4 = sext i16 %0 to i32
193  %mul = mul nsw i32 %conv, %conv4
194  %arrayidx6 = getelementptr inbounds i16, ptr %arg2, i32 %add
195  %3 = load i16, ptr %arrayidx6, align 2
196  %conv7 = sext i16 %3 to i32
197  %conv8 = sext i16 %1 to i32
198  %mul9 = mul nsw i32 %conv7, %conv8
199  %add10 = add i32 %mul, %mac1.026
200  %add11 = add i32 %mul9, %add10
201  %count.next = mul i32 %conv4, %count
202  %exitcond = icmp ne i32 %add, %arg
203  br i1 %exitcond, label %for.body, label %for.cond.cleanup
204}
205
206define i32 @mul_top_user(i32 %arg, ptr nocapture readnone %arg1, ptr nocapture readonly %arg2, ptr nocapture readonly %arg3) {
207; CHECK-LE-LABEL: mul_top_user:
208; CHECK-LE:       @ %bb.0: @ %entry
209; CHECK-LE-NEXT:    .save {r4, lr}
210; CHECK-LE-NEXT:    push {r4, lr}
211; CHECK-LE-NEXT:    cmp r0, #1
212; CHECK-LE-NEXT:    blt .LBB2_4
213; CHECK-LE-NEXT:  @ %bb.1: @ %for.body.preheader
214; CHECK-LE-NEXT:    subs r3, #2
215; CHECK-LE-NEXT:    subs r2, #2
216; CHECK-LE-NEXT:    mov.w r12, #0
217; CHECK-LE-NEXT:    movs r1, #0
218; CHECK-LE-NEXT:  .LBB2_2: @ %for.body
219; CHECK-LE-NEXT:    @ =>This Inner Loop Header: Depth=1
220; CHECK-LE-NEXT:    ldr lr, [r3, #2]!
221; CHECK-LE-NEXT:    subs r0, #1
222; CHECK-LE-NEXT:    ldr r4, [r2, #2]!
223; CHECK-LE-NEXT:    smlad r12, r4, lr, r12
224; CHECK-LE-NEXT:    asr.w r4, r4, #16
225; CHECK-LE-NEXT:    mul r1, r4, r1
226; CHECK-LE-NEXT:    bne .LBB2_2
227; CHECK-LE-NEXT:  @ %bb.3: @ %for.cond.cleanup
228; CHECK-LE-NEXT:    add.w r0, r12, r1
229; CHECK-LE-NEXT:    pop {r4, pc}
230; CHECK-LE-NEXT:  .LBB2_4:
231; CHECK-LE-NEXT:    mov.w r12, #0
232; CHECK-LE-NEXT:    movs r1, #0
233; CHECK-LE-NEXT:    add.w r0, r12, r1
234; CHECK-LE-NEXT:    pop {r4, pc}
235;
236; CHECK-BE-LABEL: mul_top_user:
237; CHECK-BE:       @ %bb.0: @ %entry
238; CHECK-BE-NEXT:    .save {r4, lr}
239; CHECK-BE-NEXT:    push {r4, lr}
240; CHECK-BE-NEXT:    cmp r0, #1
241; CHECK-BE-NEXT:    blt .LBB2_4
242; CHECK-BE-NEXT:  @ %bb.1: @ %for.body.preheader
243; CHECK-BE-NEXT:    subs r3, #2
244; CHECK-BE-NEXT:    subs r2, #2
245; CHECK-BE-NEXT:    mov.w r12, #0
246; CHECK-BE-NEXT:    movs r1, #0
247; CHECK-BE-NEXT:  .LBB2_2: @ %for.body
248; CHECK-BE-NEXT:    @ =>This Inner Loop Header: Depth=1
249; CHECK-BE-NEXT:    ldrsh lr, [r3, #2]!
250; CHECK-BE-NEXT:    subs r0, #1
251; CHECK-BE-NEXT:    ldrsh r4, [r2, #2]!
252; CHECK-BE-NEXT:    smlabb r12, r4, lr, r12
253; CHECK-BE-NEXT:    ldrsh.w r4, [r2, #2]
254; CHECK-BE-NEXT:    ldrsh.w lr, [r3, #2]
255; CHECK-BE-NEXT:    mul r1, r4, r1
256; CHECK-BE-NEXT:    smlabb r12, r4, lr, r12
257; CHECK-BE-NEXT:    bne .LBB2_2
258; CHECK-BE-NEXT:  @ %bb.3: @ %for.cond.cleanup
259; CHECK-BE-NEXT:    add.w r0, r12, r1
260; CHECK-BE-NEXT:    pop {r4, pc}
261; CHECK-BE-NEXT:  .LBB2_4:
262; CHECK-BE-NEXT:    mov.w r12, #0
263; CHECK-BE-NEXT:    movs r1, #0
264; CHECK-BE-NEXT:    add.w r0, r12, r1
265; CHECK-BE-NEXT:    pop {r4, pc}
266entry:
267  %cmp24 = icmp sgt i32 %arg, 0
268  br i1 %cmp24, label %for.body.preheader, label %for.cond.cleanup
269
270for.body.preheader:
271  %.pre = load i16, ptr %arg3, align 2
272  %.pre27 = load i16, ptr %arg2, align 2
273  br label %for.body
274
275for.cond.cleanup:
276  %mac1.0.lcssa = phi i32 [ 0, %entry ], [ %add11, %for.body ]
277  %count.final = phi i32 [ 0, %entry ], [ %count.next, %for.body ]
278  %res = add i32 %mac1.0.lcssa, %count.final
279  ret i32 %res
280
281for.body:
282  %mac1.026 = phi i32 [ %add11, %for.body ], [ 0, %for.body.preheader ]
283  %i.025 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ]
284  %count = phi i32 [ %count.next, %for.body ], [ 0, %for.body.preheader ]
285  %arrayidx = getelementptr inbounds i16, ptr %arg3, i32 %i.025
286  %0 = load i16, ptr %arrayidx, align 2
287  %add = add nuw nsw i32 %i.025, 1
288  %arrayidx1 = getelementptr inbounds i16, ptr %arg3, i32 %add
289  %1 = load i16, ptr %arrayidx1, align 2
290  %arrayidx3 = getelementptr inbounds i16, ptr %arg2, i32 %i.025
291  %2 = load i16, ptr %arrayidx3, align 2
292  %conv = sext i16 %2 to i32
293  %conv4 = sext i16 %0 to i32
294  %mul = mul nsw i32 %conv, %conv4
295  %arrayidx6 = getelementptr inbounds i16, ptr %arg2, i32 %add
296  %3 = load i16, ptr %arrayidx6, align 2
297  %conv7 = sext i16 %3 to i32
298  %conv8 = sext i16 %1 to i32
299  %mul9 = mul nsw i32 %conv7, %conv8
300  %add10 = add i32 %mul, %mac1.026
301  %add11 = add i32 %mul9, %add10
302  %count.next = mul i32 %conv7, %count
303  %exitcond = icmp ne i32 %add, %arg
304  br i1 %exitcond, label %for.body, label %for.cond.cleanup
305}
306
307define i32 @and_user(i32 %arg, ptr nocapture readnone %arg1, ptr nocapture readonly %arg2, ptr nocapture readonly %arg3) {
308; CHECK-LE-LABEL: and_user:
309; CHECK-LE:       @ %bb.0: @ %entry
310; CHECK-LE-NEXT:    .save {r4, lr}
311; CHECK-LE-NEXT:    push {r4, lr}
312; CHECK-LE-NEXT:    cmp r0, #1
313; CHECK-LE-NEXT:    blt .LBB3_4
314; CHECK-LE-NEXT:  @ %bb.1: @ %for.body.preheader
315; CHECK-LE-NEXT:    sub.w lr, r3, #2
316; CHECK-LE-NEXT:    subs r2, #2
317; CHECK-LE-NEXT:    mov.w r12, #0
318; CHECK-LE-NEXT:    movs r1, #0
319; CHECK-LE-NEXT:  .LBB3_2: @ %for.body
320; CHECK-LE-NEXT:    @ =>This Inner Loop Header: Depth=1
321; CHECK-LE-NEXT:    ldr r3, [lr, #2]!
322; CHECK-LE-NEXT:    subs r0, #1
323; CHECK-LE-NEXT:    ldr r4, [r2, #2]!
324; CHECK-LE-NEXT:    smlad r12, r4, r3, r12
325; CHECK-LE-NEXT:    uxth r3, r3
326; CHECK-LE-NEXT:    mul r1, r3, r1
327; CHECK-LE-NEXT:    bne .LBB3_2
328; CHECK-LE-NEXT:  @ %bb.3: @ %for.cond.cleanup
329; CHECK-LE-NEXT:    add.w r0, r12, r1
330; CHECK-LE-NEXT:    pop {r4, pc}
331; CHECK-LE-NEXT:  .LBB3_4:
332; CHECK-LE-NEXT:    mov.w r12, #0
333; CHECK-LE-NEXT:    movs r1, #0
334; CHECK-LE-NEXT:    add.w r0, r12, r1
335; CHECK-LE-NEXT:    pop {r4, pc}
336;
337; CHECK-BE-LABEL: and_user:
338; CHECK-BE:       @ %bb.0: @ %entry
339; CHECK-BE-NEXT:    .save {r4, r5, r7, lr}
340; CHECK-BE-NEXT:    push {r4, r5, r7, lr}
341; CHECK-BE-NEXT:    cmp r0, #1
342; CHECK-BE-NEXT:    blt .LBB3_4
343; CHECK-BE-NEXT:  @ %bb.1: @ %for.body.preheader
344; CHECK-BE-NEXT:    subs r3, #2
345; CHECK-BE-NEXT:    subs r2, #2
346; CHECK-BE-NEXT:    mov.w r12, #0
347; CHECK-BE-NEXT:    movs r1, #0
348; CHECK-BE-NEXT:  .LBB3_2: @ %for.body
349; CHECK-BE-NEXT:    @ =>This Inner Loop Header: Depth=1
350; CHECK-BE-NEXT:    ldrh lr, [r3, #2]!
351; CHECK-BE-NEXT:    subs r0, #1
352; CHECK-BE-NEXT:    ldrsh r4, [r2, #2]!
353; CHECK-BE-NEXT:    ldrsh.w r5, [r2, #2]
354; CHECK-BE-NEXT:    mul r1, lr, r1
355; CHECK-BE-NEXT:    smlabb r12, r4, lr, r12
356; CHECK-BE-NEXT:    ldrsh.w r4, [r3, #2]
357; CHECK-BE-NEXT:    smlabb r12, r5, r4, r12
358; CHECK-BE-NEXT:    bne .LBB3_2
359; CHECK-BE-NEXT:  @ %bb.3: @ %for.cond.cleanup
360; CHECK-BE-NEXT:    add.w r0, r12, r1
361; CHECK-BE-NEXT:    pop {r4, r5, r7, pc}
362; CHECK-BE-NEXT:  .LBB3_4:
363; CHECK-BE-NEXT:    mov.w r12, #0
364; CHECK-BE-NEXT:    movs r1, #0
365; CHECK-BE-NEXT:    add.w r0, r12, r1
366; CHECK-BE-NEXT:    pop {r4, r5, r7, pc}
367entry:
368  %cmp24 = icmp sgt i32 %arg, 0
369  br i1 %cmp24, label %for.body.preheader, label %for.cond.cleanup
370
371for.body.preheader:
372  %.pre = load i16, ptr %arg3, align 2
373  %.pre27 = load i16, ptr %arg2, align 2
374  br label %for.body
375
376for.cond.cleanup:
377  %mac1.0.lcssa = phi i32 [ 0, %entry ], [ %add11, %for.body ]
378  %count.final = phi i32 [ 0, %entry ], [ %count.next, %for.body ]
379  %res = add i32 %mac1.0.lcssa, %count.final
380  ret i32 %res
381
382for.body:
383  %mac1.026 = phi i32 [ %add11, %for.body ], [ 0, %for.body.preheader ]
384  %i.025 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ]
385  %count = phi i32 [ %count.next, %for.body ], [ 0, %for.body.preheader ]
386  %arrayidx = getelementptr inbounds i16, ptr %arg3, i32 %i.025
387  %0 = load i16, ptr %arrayidx, align 2
388  %add = add nuw nsw i32 %i.025, 1
389  %arrayidx1 = getelementptr inbounds i16, ptr %arg3, i32 %add
390  %arrayidx3 = getelementptr inbounds i16, ptr %arg2, i32 %i.025
391  %arrayidx6 = getelementptr inbounds i16, ptr %arg2, i32 %add
392  %1 = load i16, ptr %arrayidx1, align 2
393  %2 = load i16, ptr %arrayidx3, align 2
394  %conv = sext i16 %2 to i32
395  %conv4 = sext i16 %0 to i32
396  %bottom = and i32 %conv4, 65535
397  %mul = mul nsw i32 %conv, %conv4
398  %3 = load i16, ptr %arrayidx6, align 2
399  %conv7 = sext i16 %3 to i32
400  %conv8 = sext i16 %1 to i32
401  %mul9 = mul nsw i32 %conv7, %conv8
402  %add10 = add i32 %mul, %mac1.026
403  %add11 = add i32 %mul9, %add10
404  %count.next = mul i32 %bottom, %count
405  %exitcond = icmp ne i32 %add, %arg
406  br i1 %exitcond, label %for.body, label %for.cond.cleanup
407}
408
409define i32 @multi_uses(i32 %arg, ptr nocapture readnone %arg1, ptr nocapture readonly %arg2, ptr nocapture readonly %arg3) {
410; CHECK-LE-LABEL: multi_uses:
411; CHECK-LE:       @ %bb.0: @ %entry
412; CHECK-LE-NEXT:    .save {r4, lr}
413; CHECK-LE-NEXT:    push {r4, lr}
414; CHECK-LE-NEXT:    cmp r0, #1
415; CHECK-LE-NEXT:    blt .LBB4_4
416; CHECK-LE-NEXT:  @ %bb.1: @ %for.body.preheader
417; CHECK-LE-NEXT:    subs r3, #2
418; CHECK-LE-NEXT:    subs r2, #2
419; CHECK-LE-NEXT:    mov.w lr, #0
420; CHECK-LE-NEXT:    mov.w r12, #0
421; CHECK-LE-NEXT:  .LBB4_2: @ %for.body
422; CHECK-LE-NEXT:    @ =>This Inner Loop Header: Depth=1
423; CHECK-LE-NEXT:    ldr r1, [r3, #2]!
424; CHECK-LE-NEXT:    subs r0, #1
425; CHECK-LE-NEXT:    ldr r4, [r2, #2]!
426; CHECK-LE-NEXT:    smlad lr, r4, r1, lr
427; CHECK-LE-NEXT:    eor.w r4, r1, r12
428; CHECK-LE-NEXT:    mul r1, r4, r1
429; CHECK-LE-NEXT:    lsl.w r12, r1, #16
430; CHECK-LE-NEXT:    bne .LBB4_2
431; CHECK-LE-NEXT:  @ %bb.3: @ %for.cond.cleanup
432; CHECK-LE-NEXT:    add.w r0, lr, r12
433; CHECK-LE-NEXT:    pop {r4, pc}
434; CHECK-LE-NEXT:  .LBB4_4:
435; CHECK-LE-NEXT:    mov.w lr, #0
436; CHECK-LE-NEXT:    mov.w r12, #0
437; CHECK-LE-NEXT:    add.w r0, lr, r12
438; CHECK-LE-NEXT:    pop {r4, pc}
439;
440; CHECK-BE-LABEL: multi_uses:
441; CHECK-BE:       @ %bb.0: @ %entry
442; CHECK-BE-NEXT:    .save {r4, r5, r7, lr}
443; CHECK-BE-NEXT:    push {r4, r5, r7, lr}
444; CHECK-BE-NEXT:    cmp r0, #1
445; CHECK-BE-NEXT:    blt .LBB4_4
446; CHECK-BE-NEXT:  @ %bb.1: @ %for.body.preheader
447; CHECK-BE-NEXT:    subs r3, #2
448; CHECK-BE-NEXT:    subs r2, #2
449; CHECK-BE-NEXT:    mov.w r12, #0
450; CHECK-BE-NEXT:    mov.w lr, #0
451; CHECK-BE-NEXT:  .LBB4_2: @ %for.body
452; CHECK-BE-NEXT:    @ =>This Inner Loop Header: Depth=1
453; CHECK-BE-NEXT:    ldrsh r4, [r2, #2]!
454; CHECK-BE-NEXT:    subs r0, #1
455; CHECK-BE-NEXT:    ldrsh r1, [r3, #2]!
456; CHECK-BE-NEXT:    ldrsh.w r5, [r2, #2]
457; CHECK-BE-NEXT:    smlabb r12, r4, r1, r12
458; CHECK-BE-NEXT:    ldrsh.w r4, [r3, #2]
459; CHECK-BE-NEXT:    smlabb r12, r5, r4, r12
460; CHECK-BE-NEXT:    eor.w r5, r1, lr
461; CHECK-BE-NEXT:    mul r1, r5, r1
462; CHECK-BE-NEXT:    lsl.w lr, r1, #16
463; CHECK-BE-NEXT:    bne .LBB4_2
464; CHECK-BE-NEXT:  @ %bb.3: @ %for.cond.cleanup
465; CHECK-BE-NEXT:    add.w r0, r12, lr
466; CHECK-BE-NEXT:    pop {r4, r5, r7, pc}
467; CHECK-BE-NEXT:  .LBB4_4:
468; CHECK-BE-NEXT:    mov.w r12, #0
469; CHECK-BE-NEXT:    mov.w lr, #0
470; CHECK-BE-NEXT:    add.w r0, r12, lr
471; CHECK-BE-NEXT:    pop {r4, r5, r7, pc}
472entry:
473  %cmp24 = icmp sgt i32 %arg, 0
474  br i1 %cmp24, label %for.body.preheader, label %for.cond.cleanup
475
476for.body.preheader:
477  %.pre = load i16, ptr %arg3, align 2
478  %.pre27 = load i16, ptr %arg2, align 2
479  br label %for.body
480
481for.cond.cleanup:
482  %mac1.0.lcssa = phi i32 [ 0, %entry ], [ %add11, %for.body ]
483  %count.final = phi i32 [ 0, %entry ], [ %count.next, %for.body ]
484  %res = add i32 %mac1.0.lcssa, %count.final
485  ret i32 %res
486
487for.body:
488  %mac1.026 = phi i32 [ %add11, %for.body ], [ 0, %for.body.preheader ]
489  %i.025 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ]
490  %count = phi i32 [ %count.next, %for.body ], [ 0, %for.body.preheader ]
491  %arrayidx = getelementptr inbounds i16, ptr %arg3, i32 %i.025
492  %0 = load i16, ptr %arrayidx, align 2
493  %add = add nuw nsw i32 %i.025, 1
494  %arrayidx1 = getelementptr inbounds i16, ptr %arg3, i32 %add
495  %arrayidx3 = getelementptr inbounds i16, ptr %arg2, i32 %i.025
496  %arrayidx6 = getelementptr inbounds i16, ptr %arg2, i32 %add
497  %1 = load i16, ptr %arrayidx1, align 2
498  %2 = load i16, ptr %arrayidx3, align 2
499  %conv = sext i16 %2 to i32
500  %conv4 = sext i16 %0 to i32
501  %bottom = and i32 %conv4, 65535
502  %mul = mul nsw i32 %conv, %conv4
503  %3 = load i16, ptr %arrayidx6, align 2
504  %conv7 = sext i16 %3 to i32
505  %conv8 = sext i16 %1 to i32
506  %mul9 = mul nsw i32 %conv7, %conv8
507  %add10 = add i32 %mul, %mac1.026
508  %shl = shl i32 %conv4, 16
509  %add11 = add i32 %mul9, %add10
510  %xor = xor i32 %bottom, %count
511  %count.next = mul i32 %xor, %shl
512  %exitcond = icmp ne i32 %add, %arg
513  br i1 %exitcond, label %for.body, label %for.cond.cleanup
514}
515